import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FormatStrFormatter
# Import library
from scipy import stats
from scipy.stats import spearmanr, pearsonr, linregress
from scipy.stats import chi2_contingency
from scipy.stats import ks_2samp
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
import Rbeast as rb
import piecewise_regression
footnotes_df = pd.read_csv('../../Footnote article/Data Manifests/ECCOFictionTXTLab.csv', encoding='utf-8', parse_dates=True)
#footnotes_df = pd.read_csv('../../../../Dropbox/Footnote article/Data Manifests/ECCOFictionTXTLab.csv', encoding='utf-8', parse_dates=True)
This first section of visualizations aim to give an overview of our dataset as a whole. As such, they use only the ECCOFictionTXTLab.csv.
The footnote-detection models were run on an an initial corpus of 8607 volumes of fiction comprising 5686 works of fiction (8607 documents, 5685 ESTC_ids.) See list in ECCOFictionTXTLab.csv. These works were identifed in ECCO using a classifier built by txtLab collaborators based on the Stanford/Mark Algee-Hewitt list of 1579 works of fiction in ECCO.
Filename: name of XML file for volume
DocumentID: ECCO identifier
ESTC_ID: EST identifier
Date: Date of publication
Title: Title of work
Vol_Number: Volume number
Author : Author Name
Imprint : Publication imprint information
Field_Headings : ECCO subject headings
fn_pages: number of pages with footnotes predicted by the model's four machine learning and classification algorithms
nofn_pages: number of pages with no footnotes predicted by the model's four machine learning and classification algorithms
TableName: Name of metadata table
fn.percent: percentage of pages with footnotes predicted
# Let's look at the first 5 rows of ECCOFictionTXTLab.csv data
footnotes_df.head(5)
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0003900900.xml | 3900900 | T110586 | 1753 | Memoires secrets pour servir <c3><a0> l'histoi... | 0 | M. M. C. F., Ecu<c3><a8>ier | London : printed for R. Jennys, [1753?]. | French fiction, 18th century | 0 | 30 | Manifest_HistAndGeo | 0.000000 | 1750 |
| 1 | 2 | 0004700401.xml | 4700401 | T125094 | 1748 | Anecdotes de la cour de Fran<c3><a7>ois I. Par... | Volume 1 | Lussan, Marguerite de | Londres [i.e. Paris?] : chez Jean Nours [sic],... | France, Court and courtiers, France, History, ... | 0 | 304 | Manifest_HistAndGeo | 0.000000 | 1740 |
| 2 | 3 | 0004700402.xml | 4700402 | T125094 | 1748 | Anecdotes de la cour de Fran<c3><a7>ois I. Par... | Volume 2 | Lussan, Marguerite de | Londres [i.e. Paris?] : chez Jean Nours [sic],... | France, Court and courtiers, France, History, ... | 1 | 343 | Manifest_HistAndGeo | 0.002907 | 1740 |
| 3 | 4 | 0004700403.xml | 4700403 | T125094 | 1748 | Anecdotes de la cour de Fran<c3><a7>ois I. Par... | Volume 3 | Lussan, Marguerite de | Londres [i.e. Paris?] : chez Jean Nours [sic],... | France, Court and courtiers, France, History, ... | 0 | 309 | Manifest_HistAndGeo | 0.000000 | 1740 |
| 4 | 5 | 0007901400.xml | 7901400 | T069692 | 1782 | The history of the civil wars in Germany, from... | 0 | Defoe, Daniel | Newark : printed by James Tomlinson, for the p... | Thirty Years' War, 1618-1648, Fiction, Great B... | 2 | 379 | Manifest_HistAndGeo | 0.005249 | 1780 |
# Add a new column to the dataframe called "Footnotes_predicted", which will read "True" if fn_pages are not 0
footnotes_df.loc[footnotes_df['fn_pages'] > 0, 'Footnotes_predicted'] = 'True'
# Fix a date error
footnotes_df = footnotes_df.replace(to_replace=5480,
value=1710)
footnotes_df['Date'].max()
1809
# Plot a time series of the total volumes in our dataset vs. the total volumes identified as having footnotes
# so that we can compare trends
footnotes_df.groupby('Date')[['DocumentID', 'Footnotes_predicted']].count().plot(title="Number of volumes in corpus vs number of volumes with foontotes predicted", figsize=(16,8))\
.set_ylabel("Number of volumes")
plt.legend(["Total volumes", "Volumes with footnotes predicted"])
<matplotlib.legend.Legend at 0x13b2c8bd0>
print("Number of volumes in our ECCO dataset:")
footnotes_df['DocumentID'].count()
Number of volumes in our ECCO dataset:
8606
# Calculate the total number of volumes with footnotes present, according to the machine learning classification algorithms
print("Total number of volumes with at least one page of predicted footnotes,")
print("(according to the machine learning and classification algorithms):")
footnotes_df.Footnotes_predicted.count()
Total number of volumes with at least one page of predicted footnotes, (according to the machine learning and classification algorithms):
4260
# Plot a time series of the total number of footnoted pages in all volumes
footnotes_df.groupby('Date')['fn_pages'].sum().plot(title="Total number of pages with footnotes predicted in all volumes, by date", figsize=(16,8)).set_ylabel("Number of pages")
Text(0, 0.5, 'Number of pages')
# Plot a time series (by decade, not year), of the total number of footnoted pages in all volumes
footnotes_df.groupby('decade')['fn_pages'].sum().plot(title="Total number of pages with footnotes predicted in all volumes, by decade", figsize=(16,8)).set_ylabel("Number of pages")
Text(0, 0.5, 'Number of pages')
footnotes_df.groupby('decade')['DocumentID'].count()
decade 1690 1 1700 149 1710 245 1720 380 1730 325 1740 445 1750 721 1760 1018 1770 1182 1780 1611 1790 2138 1800 391 Name: DocumentID, dtype: int64
footnotes_df.groupby('decade')['DocumentID'].count().plot(kind='bar')
<Axes: xlabel='decade'>
# Plot a time series of the average percentage of footnoted pages, per volume
footnotes_df.groupby('Date')['fn.percent'].mean().plot(title="Average percentage of pages with footnotes predicted, per volume", figsize=(16,8)).set_ylabel("Percentage (as decimal)")
Text(0, 0.5, 'Percentage (as decimal)')
# Plot a time series (by decade) of the average percentage of footnoted pages, per volume
footnotes_df.groupby('decade')['fn.percent'].mean().plot(title="Average percentage of pages with footnotes predicted, per volume", figsize=(16,8)).set_ylabel("Percentage (as decimal)")
Text(0, 0.5, 'Percentage (as decimal)')
# Plot the number of volumes by respective number of footnoted pages
footnotes_df.groupby('fn_pages')['DocumentID'].count().plot(kind='bar', title="Number of pages with foonotes predicted, by volumes", figsize=(16,8)).set_ylabel("Number of volumes")
Text(0, 0.5, 'Number of volumes')
# Plot the number of volumes by percentage of footnoted pages (using a historgram to bin percentages)
fig, ax = plt.subplots(figsize = (6,4))
footnotes_df['fn.percent'].plot(kind = "hist", bins=50, figsize=(16,8), \
title="Percentage of pages with footnotes predicted, by volume", \
grid=True).set_ylabel("Number of volumes")
plt.show()
# Plot the number of volumes by number of footnoted pages (using a historgram to bin the number)
footnotes_df['fn_pages'].plot(kind='hist', bins=50, figsize=(16,8), \
title="Number of pages with footnotes predicted, by volume", \
grid=True).set_ylabel("Number of volumes")
Text(0, 0.5, 'Number of volumes')
# Write a function to identify only the rows with footnotes present
def just_footnotes(row):
if row['fn_pages'] > 0:
return row['fn_pages']
else:
return ''
# Use our function to add a new column to our dataframew with just the rows with foonotes
footnotes_df['just_footnotes'] = footnotes_df.apply(just_footnotes, axis=1)
footnotes_df[['just_footnotes']] = footnotes_df[['just_footnotes']].apply(pd.to_numeric)
footnotes_df['just_footnotes'].dtype
dtype('float64')
# Using the new column we created
# Plot the distribution of footnoted pages in volumes with footnotes present
footnotes_df['just_footnotes'].plot(kind='hist', bins=100, figsize=(16,8), \
title="Number of pages with footnotes predicted, by volume, excluding volumes with 0 footnotes", \
grid=True).set_ylabel("Number of volumes")
Text(0, 0.5, 'Number of volumes')
footnotes_df.hist('fn_pages', bins=[1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 25, 30, 35, 40, 45, 50, 55, 65, 70, 75, 80, 85, 100, 105, 110, 115, 120, 125, 130, 135, 140, 145, 150, 155, 160, 170, 180, 190, 200, 220, 240, 250, 300, 350], figsize=(16,8))
plt.suptitle('Number of pages with footnotes predicted, by volume (excluding volumes with 0 footnotes)', x=0.5, y=1.0, ha='center', fontsize='xx-large')
fig.text(0.5, 0.04, 'common X', ha='center')
fig.text(0.04, 0.5, 'common Y', va='center', rotation='vertical')
Text(0.04, 0.5, 'common Y')
mean_number_of_pages = np.mean(footnotes_df['fn_pages'])
mean_number_of_pages
5.221240994654892
median_number_of_pages = np.nanmedian(footnotes_df['fn_pages'])
median_number_of_pages
0.0
median_number_of_pages_excluding_0 = np.nanmedian(footnotes_df['just_footnotes'])
median_number_of_pages_excluding_0
2.0
mean_percent_of_pages = np.mean(footnotes_df['fn.percent'])
mean_percent_of_pages
0.017668493307886692
median_percent_of_pages = np.nanmedian(footnotes_df['fn.percent'])
median_percent_of_pages
0.0
mode = stats.mode(footnotes_df['fn_pages'])
print("The modal value is {} with a count of {}".format(mode.mode[0], mode.count[0]))
The modal value is 0 with a count of 4346
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/271863990.py:1: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning. mode = stats.mode(footnotes_df['fn_pages'])
# Let's look at the footnote percentage breakdown
footnote_percentages = []
for footnote_percentage in footnotes_df['fn.percent']:
if footnote_percentage == 0:
footnote_percentages.append("0% footnotes")
elif type(footnote_percentage) == float:
if 0.001 > footnote_percentage > 0:
footnote_percentages.append("0-0.1% footnotes")
elif 0.006 > footnote_percentage >= 0.001:
footnote_percentages.append("0.1-0.6% footnotes")
elif 0.10 > footnote_percentage >= 0.006:
footnote_percentages.append("0.6-10% footnotes")
elif 0.25 > footnote_percentage >= 0.10:
footnote_percentages.append("10-25% footnotes")
elif footnote_percentage >= 0.25:
footnote_percentages.append("25+% footnotes")
else:
footnote_percentages.append("0% footnotes")
all_footnote_percentage_sets_df = pd.DataFrame (footnote_percentages, columns = ['footnote_percentages'])
print("Footnote percentage categories in ECCO data:")
all_footnote_percentage_sets_df['footnote_percentages'].value_counts()
Footnote percentage categories in ECCO data:
0% footnotes 4346 0.6-10% footnotes 2553 0.1-0.6% footnotes 1292 10-25% footnotes 329 25+% footnotes 86 Name: footnote_percentages, dtype: int64
all_footnote_percentage_sets_df['footnote_percentages'].value_counts().plot(kind="pie", autopct='%.2f', title="Footnote percentages in general dataset")
<Axes: title={'center': 'Footnote percentages in general dataset'}, ylabel='footnote_percentages'>
#mean_number_of_pages = np.nanmean(footnotes_df['just_footnotes'])
#mean_number_of_pages
mean_percent_of_pages = np.nanmean(footnotes_df['fn.percent'])
mean_percent_of_pages
0.017668493307886692
median_percent_of_pages = np.nanmedian(footnotes_df['fn.percent'])
median_percent_of_pages
0.0
genre_term = "memoir|Memoir"
dataframe = footnotes_df
print(f"Number of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_ECCO_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the ECCO CORPUS whose titles contain the words 'memoir|Memoir': 481 Percent of volumes in the ECCO CORPUS whose titles contain the words 'memoir|Memoir': 0.055891238670694864
genre_term = "letter|Letter|lettre|Lettre"
dataframe = footnotes_df
print(f"Number of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_ECCO_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the ECCO CORPUS whose titles contain the words 'letter|Letter|lettre|Lettre': 1144 Percent of volumes in the ECCO CORPUS whose titles contain the words 'letter|Letter|lettre|Lettre': 0.13293051359516617
genre_term = "tale|Tale|recit|Recit"
dataframe = footnotes_df
print(f"Number of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_ECCO_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the ECCO CORPUS whose titles contain the words 'tale|Tale|recit|Recit': 663 Percent of volumes in the ECCO CORPUS whose titles contain the words 'tale|Tale|recit|Recit': 0.0770392749244713
genre_term = "novel|Novel|roman|Roman"
dataframe = footnotes_df
print(f"Number of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_ECCO_novel = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the ECCO CORPUS whose titles contain the words 'novel|Novel|roman|Roman': 1046 Percent of volumes in the ECCO CORPUS whose titles contain the words 'novel|Novel|roman|Roman': 0.12154310945851732
genre_term = "romance|Romance"
dataframe = footnotes_df
print(f"Number of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_ECCO_romance = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the ECCO CORPUS whose titles contain the words 'romance|Romance': 157 Percent of volumes in the ECCO CORPUS whose titles contain the words 'romance|Romance': 0.018243086218917036
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = footnotes_df
print(f"Number of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_ECCO_history = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the ECCO CORPUS whose titles contain the words 'history|History|histories|Histories|histoire|Histoire': 1632 Percent of volumes in the ECCO CORPUS whose titles contain the words 'history|History|histories|Histories|histoire|Histoire': 0.18963513827562167
genre_term = "adventure|Adventures"
dataframe = footnotes_df
print(f"Number of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the ECCO CORPUS whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_ECCO_adventure = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the ECCO CORPUS whose titles contain the words 'adventure|Adventures': 976 Percent of volumes in the ECCO CORPUS whose titles contain the words 'adventure|Adventures': 0.113409249360911
# Proportions of Memoirs, Letters, Tales
proportions_ECCO = [proportion_ECCO_memoirs, proportion_ECCO_letters, proportion_ECCO_tale]
proportions_ECCO
[0.055891238670694864, 0.13293051359516617, 0.0770392749244713]
proportions_ECCO_full = [proportion_ECCO_memoirs, proportion_ECCO_letters, proportion_ECCO_tale, \
proportion_ECCO_novel, proportion_ECCO_romance, proportion_ECCO_history, \
proportion_ECCO_adventure]
genre_titles = ["Memoirs", "Letters", "Tales", "Novel", "Romance", "History", "Adventure"]
proportions_ECCO_full= pd.DataFrame(zip(proportions_ECCO_full, genre_titles), columns = ['Proportion in Corpus', 'Genre'])
proportions_ECCO_full
| Proportion in Corpus | Genre | |
|---|---|---|
| 0 | 0.055891 | Memoirs |
| 1 | 0.132931 | Letters |
| 2 | 0.077039 | Tales |
| 3 | 0.121543 | Novel |
| 4 | 0.018243 | Romance |
| 5 | 0.189635 | History |
| 6 | 0.113409 | Adventure |
This data subset represents all volumes in ECCOFictionTXTLab.csv that contain one page with footnotes predicted. The individual page images for each of these pages was subsequently human-verified to validate the footnote prediction and count the number of footnotes present.
The subset was created from the ECCO-Footnote-Manifest-volumes-with-one-footnote.csv, using the human-verification data from researcehers.
fn_pages: number of pages with footnotes predicted by the model's four machine learning and classification algorithms
nofn_pages: number of pages with no footnotes predicted by the model's four machine learning and classification algorithms
TableName: Name of metadata table
fn.percent: percentage of pages with footnotes predicted
footnotes_present: number of footnotes present (human-verified)
# Create a new dataframe with the annotated footnotes
one_footnote_df = pd.read_csv('Human-Verified-ECCO-Footnote-Manifest-volumes-with-one-footnote.csv', encoding='utf-8', parse_dates=True)
# Let's look at the first 5 rows of our one-foonote subset
one_footnote_df.head(5)
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 |
| 3 | 450 | 0000200500.xml | 200500 | T128705 | 1791 | Scelta di novelle di Giovanni Boccaccio, fatta... | 0 | Boccaccio, Giovanni | In Londra : presso Francesco Wingrave, success... | Italian fiction, 18th century | 1 | 306 | Manifest_LitAndLang1 | 0.003257 | 1790 | 0 |
| 4 | 453 | 0000200700.xml | 200700 | T128696 | 1741 | The decameron, or ten days entertainment of Bo... | 0 | Boccaccio, Giovanni | London : printed for R. Dodsley, at Tully's He... | Wood-engraving, English, Specimens, 17th century | 1 | 600 | Manifest_LitAndLang1 | 0.001664 | 1740 | 2 |
one_footnote_df['footnotes_present'] = one_footnote_df['footnotes_present'].astype(int)
one_footnote_df.groupby('Date')[['footnotes_present']].count().plot(title="Total volumes in our one footnote dataset", figsize=(16,8))\
.set_ylabel("Total number of works footnotes")
plt.legend(["Number of volumes"])
<matplotlib.legend.Legend at 0x13bfd2550>
one_footnote_df.groupby('decade')[['footnotes_present']].count().plot(title="Total volumes in our one footnote dataset", figsize=(16,8))\
.set_ylabel("Total number of works footnotes")
plt.legend(["Number of volumes"])
<matplotlib.legend.Legend at 0x13c213490>
one_footnote_df.groupby('Date')[['footnotes_present']].sum().plot(title="Number of volumes in one footnote dataset, by date", figsize=(16,8))\
.set_ylabel("Total number of works footnotes")
Text(0, 0.5, 'Total number of works footnotes')
one_footnote_df.plot.scatter(x = 'Date', y = 'footnotes_present', s=300, alpha=0.2, title="Number of volumes in one-footnote-subset in each footnote category, by date", figsize=(16,8)).set_ylabel("Number of footnotes")
Text(0, 0.5, 'Number of footnotes')
one_footnote_df['fn_pages'].dtype
dtype('int64')
ax = one_footnote_df.groupby('footnotes_present')['DocumentID'].count().plot(kind='bar', title="Total footnotes in one footnote dataset", figsize=(16,8))
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Number of volumes")
ax.set_ylabel("Number of footnotes")
Text(0, 0.5, 'Number of footnotes')
footnote_percentages_in_one_footnote_set = []
for footnote_percentage in one_footnote_df['fn.percent']:
if footnote_percentage == 0:
footnote_percentages_in_one_footnote_set.append("0% footnotes")
elif type(footnote_percentage) == float:
if 0.001 > footnote_percentage > 0:
footnote_percentages_in_one_footnote_set.append("0-0.1% footnotes")
elif 0.006 > footnote_percentage >= 0.001:
footnote_percentages_in_one_footnote_set.append("0.1-0.6% footnotes")
elif 0.10 > footnote_percentage >= 0.006:
footnote_percentages_in_one_footnote_set.append("0.6-10% footnotes")
elif 0.25 > footnote_percentage >= 0.10:
footnote_percentages_in_one_footnote_set.append("10-25% footnotes")
elif footnote_percentage >= 0.25:
footnote_percentages_in_one_footnote_set.append("25+% footnotes")
else:
footnote_percentages_in_one_footnote_set.append("0% footnotes")
one_footnote_percentage_sets_df = pd.DataFrame (footnote_percentages_in_one_footnote_set, columns = ['footnote_percentages'])
print("Footnote percentage categories in one-footnote subset data:")
one_footnote_percentage_sets_df['footnote_percentages'].value_counts()
Footnote percentage categories in one-footnote subset data:
0.1-0.6% footnotes 1156 0.6-10% footnotes 320 10-25% footnotes 8 Name: footnote_percentages, dtype: int64
one_footnote_percentage_sets_df['footnote_percentages'].value_counts().plot(kind='pie', autopct='%.2f', title="Breakdown of footnoted pages in one-footnote subset",)
<Axes: title={'center': 'Breakdown of footnoted pages in one-footnote subset'}, ylabel='footnote_percentages'>
one_footnote_df['footnotes_present'].value_counts().plot(kind='pie',autopct='%.2f', title="Categories of foonoted pages (from 0 to 5 footnotes)", figsize=(16,8))\
.set_ylabel("Categories of foonoted pages (from 0 to 5 footnotes)")
Text(0, 0.5, 'Categories of foonoted pages (from 0 to 5 footnotes)')
#Print the number of each footnote type
one_footnote_df['footnotes_present'].value_counts()
0 866 1 582 2 26 3 6 4 3 5 1 Name: footnotes_present, dtype: int64
one_footnote_df['footnotes_present'].count()
1484
# Print the percentage of each footnote category in our one footnote dataset
one_footnote_df['footnotes_present'].value_counts() / one_footnote_df['footnotes_present'].count() * 100
0 58.355795 1 39.218329 2 1.752022 3 0.404313 4 0.202156 5 0.067385 Name: footnotes_present, dtype: float64
one_footnote_df['Title'].value_counts()
The life and opinions of Tristram Shandy, gentleman. |... 11
A sentimental journey through France and Italy. By Mr. Yorick. ... 8
The adventures of Roderick Random. In two volumes. ... 8
The rambler. In four volumes. ... 7
The adventures of Peregrine Pickle. In which are included, Memoirs of a lady of quality. In four volumes. ... 7
..
The (latin) description of Hogland: with its dedication: imitated in English. 1
Noah. Attempted from the German of Mr. Bodmer. In twelve books. By Joseph Collyer. ... 1
La belle assembl<c3><a9>e: being a curious collection of some very remarkable incidents which happened to persons of the first quality in France. Interspers'd with Entertaining and Improving Observations made by them on several Passages in History, both Ancient and Modern. Written in French for the Entertainment of the King, and dedicated to him by Madam de Gomez. Adorn'd with copper-plates. Vol. III. 1
The libertine. A novel. In a series of letters. By James Bacon. 1
The guardian. In two volumes. To which is added, a translation of the mottos and quotations: together with the arguments, and writers names of such as are known, prefixed to each paper. Not in any other edition. ... 1
Name: Title, Length: 1193, dtype: int64
one_footnote_df.describe()
| Unnamed: 0 | DocumentID | Date | fn_pages | nofn_pages | fn.percent | decade | footnotes_present | |
|---|---|---|---|---|---|---|---|---|
| count | 1484.000000 | 1.484000e+03 | 1484.000000 | 1484.0 | 1484.000000 | 1484.000000 | 1484.000000 | 1484.000000 |
| mean | 4058.925876 | 5.871330e+08 | 1772.935984 | 1.0 | 239.410377 | 0.007124 | 1768.463612 | 0.450809 |
| std | 2501.738625 | 4.469581e+08 | 23.377850 | 0.0 | 106.881887 | 0.012219 | 23.369226 | 0.587237 |
| min | 3.000000 | 1.007010e+05 | 1697.000000 | 1.0 | 6.000000 | 0.001287 | 1690.000000 | 0.000000 |
| 25% | 1840.750000 | 3.280028e+07 | 1761.000000 | 1.0 | 177.000000 | 0.003300 | 1760.000000 | 0.000000 |
| 50% | 3931.000000 | 6.603501e+08 | 1778.000000 | 1.0 | 241.000000 | 0.004132 | 1770.000000 | 0.000000 |
| 75% | 6069.500000 | 9.898002e+08 | 1792.000000 | 1.0 | 302.000000 | 0.005618 | 1790.000000 | 1.000000 |
| max | 8606.000000 | 1.294101e+09 | 1800.000000 | 1.0 | 776.000000 | 0.142857 | 1800.000000 | 5.000000 |
# Slice to include only volumes with more than 1 footnote per page
many_footnotes_one_page = one_footnote_df[one_footnote_df['footnotes_present'] > 1]
many_footnotes_one_page['footnotes_present'].value_counts().plot(kind='pie',autopct='%.2f', title="Percentage of footnoted pages in one-footnote subset with more than one footnote", figsize=(16,8))\
.set_ylabel("Categories of foonoted pages (from 2 to 5 footnotes)")
Text(0, 0.5, 'Categories of foonoted pages (from 2 to 5 footnotes)')
ax = many_footnotes_one_page['footnotes_present'].value_counts().plot(kind='bar', title="Number of volumes in one-footnote subset with more than one footnote", figsize=(16,8))
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Footnotes per page")
ax.set_ylabel("Number of volumes")
Text(0, 0.5, 'Number of volumes')
print(one_footnote_df['Title'])
0 Prince Arthur: an allegorical romance. The sto...
1 Prince Arthur: an allegorical romance. The sto...
2 Ismene and Ismenias, a novel translated from t...
3 Scelta di novelle di Giovanni Boccaccio, fatta...
4 The decameron, or ten days entertainment of Bo...
...
1479 The tatler. By the Right Honourable Joseph Add...
1480 Olivia; or, deserted bride. By the author of H...
1481 The fables of Ph<c3><a6>drus in Latin and Engl...
1482 The observer: being a collection of moral, lit...
1483 The guardian. In two volumes. To which is adde...
Name: Title, Length: 1484, dtype: object
miscategorized_pages = one_footnote_df[one_footnote_df['footnotes_present'] == 0]
miscategorized_pages.to_csv('miscategorized_pages_in_one_footnote_dataset.csv', encoding='utf-8', index=False)
miscategorized_pages['Title'].value_counts()[:50]
The adventures of Peregrine Pickle. In which are included, Memoirs of a lady of quality. In four volumes. ... 7 The rambler. In four volumes. ... 7 The spectator. ... 5 Cassandra, a romance. In five parts. Written originally in French, and faithfully translated into English, by Sir Charles Cotterell, Master of the Ceremonies to King Charles I. and King Charles II. ... 5 The lucubrations of Isaac Bickerstaff, Esq; |... 4 Clarissa, or, the history of a young lady: comprehending the most important concerns of private life. In eight volumes. By Mr. Samuel Richardson, ... 4 The fool of quality; or, the history of Henry Earl of Moreland. In four volumes. ... By Mr. Brooke. 4 The adventures of Roderick Random. In two volumes. ... 4 The devil upon two sticks in England: being a continuation of Le diable boiteux of Le Sage. 4 The adventures of Peregrine Pickle. In which are included, Memoirs of a lady of quality. By Dr. T. Smollet [sic]. 3 Cecilia: or memoirs of an heiress. By Miss Burney. Author of Evelina. In three volumes. ... 3 The spectator. ... . Carefully corrected. 3 The packet: a novel. By Miss Gunning. ... 3 The history of Eliza Warwick. In two volumes. ... 3 Cecilia, or memoirs of an heiress. By the author of Evelina. 3 The rambler. ... 3 A simple story. In four volumes. By Mrs. Inchbald. 3 A collection of novels and tales of the fairies. Written by that celebrated wit of France, the Countess d'Anois. In three volumes. ... 3 Ethelinde, or the recluse of the lake. By Charlotte Smith. In five volumes. ... 2 The involuntary inconstant; or, the history of Miss Francfort. A novel. In two volumes. ... . By the editor of The fatal compliance. 2 The censor. ... 2 Memoirs of a Scots heiress. Addressed to the Right Honourable Lady Catherine ******. By the author of Constance, &c. ... 2 Evelina; or, a young lady's entrance into the world. In two volumes. ... 2 The history of Jack Connor. In two volumes. 2 Benedicta. A novel. In two volumes. In two volumes. [sic] In two volumes. ... 2 The death of Abel. In five books. Attempted from the German of Mr. Gessner. 2 Amelia. By Henry Fielding, Esq; In four volumes. ... 2 The sailor boy. A novel. In two volumes. ... 2 Edward. A novel. In two volumes. Dedicated (by permission) to Her Majesty. ... 2 Rosina: a novel. In five volumes. By the author of Delia, an interesting tale, in Four Volumes. ... 2 Memoirs of the ancient house of Clarendon. A novel. I left no Business for this idle Trade. In three volumes. ... 2 The spectator. Volume the First. 2 Histoire de Gil Blas de Santillane. Par M. Le Sage. 2 Canterbury tales or the year 1797. By Harriet Lee. 2 Recueil de romans historiques. ... 2 Memoirs of Lady Woodford. Written by herself, and addressed to a friend. In two volumes. ... 2 The Church of Saint Siffrid. In two volumes. By the author of Ned Evans. ... 2 The rambler. By Samuel Johnson. In four volumes. Embellished with elegant frontispieces. ... 2 Love at first sight: or the history of Miss Caroline Stanhope. In three volumes. ... 2 The adventures of Telemachus, the son of Ulysses. Translated from the French of Messire Fran<c3><a7>ois Salignac de la Mothe-F<c3><a9>nelon, Archbishop Of Cambray. By T. Smollett, M. D. ... 2 Camilla: or, a picture of youth. By the author of Evelina and Cecilia. In three volumes. ... 2 Caroline of Lichtfield; a novel. Translated from the French. By Thomas Holcroft. 2 Laurentia. A novel. In two volumes. ... 2 Familiar letters between the principal characters in David Simple, and some others. To which is added, A vision. By the author of David Simple. In two volumes. ... 2 The history of Miss Betsy Thoughtless. In four volumes. ... 2 The beggar girl and her benefactors. In seven volumes. By Mrs. Bennett, Author Of Welch Heiress, Yuitnile Indiscretions, Agnes DI-Courci, And Fllen Countess Of Castle Howell. ... 2 The invisibe [sic] spy. By Explorabilis. In two volumes. ... 2 The history of Emily Montague. By the author of Lady Julia Mandeville. ... 2 La belle assembl<c3><a9>e: being a curious collection of some very remarkable incidents which happened to persons of the first quality in France. Interspers'd with Entertaining and Improving Observations made by them on several Passages in History, both Ancient and Modern. Written in French for the Entertainment of the King, and dedicated to him by Madam de Gomez. In four volumes. Adorn'd with copper-plates. 2 Letters of the Right Honourable Lady My W---y M----e: written, during her travels in Europe, Asia and Africa, to persons of distinction, men of letters, &c. in different parts of Europe. Which contain, among other curious relations, accounts of the policy and manners of the Turks; Drawn from Sources that have been inaccessible to other Travellers. In three volumes. 2 Name: Title, dtype: int64
miscategorized_pages['Author'].value_counts()[:20]
Anon 162 Smollett, Tobias George 29 Defoe, Daniel 28 Johnson, Samuel 22 Burney, Fanny 18 Haywood, Eliza Fowler 13 F<c3><a9>nelon, Fran<c3><a7>ois de Salignac de La Mothe- 11 Sterne, Laurence 10 Lady 10 Richardson, Samuel 10 Reeve, Clara 9 Bennett, Mrs. (Agnes Maria) 8 Steele, Richard, Sir 8 Fielding, Sarah 8 Brooke, Henry 8 Fielding, Henry 7 Smith, Charlotte Turner 7 Swift, Jonathan 7 Combe, William 6 Goldsmith, Oliver 6 Name: Author, dtype: int64
miscategorized_pages_in_one_fn_set = pd.read_csv('miscategorized_pages_in_one_footnote_dataset.csv', encoding='utf-8')
one_footnote_miscategorized_date_dist = miscategorized_pages_in_one_fn_set['Date'].value_counts()
one_footnote_date_dist = one_footnote_df['Date'].value_counts()
genre_term = "memoir|Memoir"
dataframe = one_footnote_df
corpus_name = "One-Footnote Corpus"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus that contain the words 'memoir|Memoir': 97 Percent of volumes in the One-Footnote Corpus that contain the words 'memoir|Memoir': 0.06536388140161725
genre_term = "letter|Letter|lettre|Lettre"
dataframe = one_footnote_df
corpus_name = "One-Footnote Corpus"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus that contain the words 'letter|Letter|lettre|Lettre': 186 Percent of volumes in the One-Footnote Corpus that contain the words 'letter|Letter|lettre|Lettre': 0.12533692722371967
genre_term = "tale|Tale|recit|Recit"
dataframe = one_footnote_df
corpus_name = "One-Footnote Corpus"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus that contain the words 'tale|Tale|recit|Recit': 119 Percent of volumes in the One-Footnote Corpus that contain the words 'tale|Tale|recit|Recit': 0.08018867924528301
genre_term = "novel|Novel|roman|Roman"
dataframe = one_footnote_df
corpus_name = "One-Footnote Corpus"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_novel =dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus that contain the words 'novel|Novel|roman|Roman': 242 Percent of volumes in the One-Footnote Corpus that contain the words 'novel|Novel|roman|Roman': 0.16307277628032346
genre_term = "romance|Romance"
dataframe = one_footnote_df
corpus_name = "One-Footnote Corpus"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_romance = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus that contain the words 'romance|Romance': 40 Percent of volumes in the One-Footnote Corpus that contain the words 'romance|Romance': 0.026954177897574125
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = one_footnote_df
corpus_name = "One-Footnote Corpus"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_history = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus that contain the words 'history|History|histories|Histories|histoire|Histoire': 290 Percent of volumes in the One-Footnote Corpus that contain the words 'history|History|histories|Histories|histoire|Histoire': 0.1954177897574124
genre_term = "adventure|Adventures"
dataframe = one_footnote_df
corpus_name = "One-Footnote Corpus"
print(f"Number of volumes in the {corpus_name} whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} whose titles contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_adventure = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus whose titles contain the words 'adventure|Adventures': 183 Percent of volumes in the One-Footnote Corpus whose titles contain the words 'adventure|Adventures': 0.12331536388140162
# Proportions of Memoirs, Letters, Tales
proportions_one_fn = [proportion_one_fn_memoirs, proportion_one_fn_letters, proportion_one_fn_tale]
proportions_one_fn
[0.06536388140161725, 0.12533692722371967, 0.08018867924528301]
proportions_one_fn_full = [proportion_one_fn_memoirs, proportion_one_fn_letters, proportion_one_fn_tale, \
proportion_one_fn_novel, proportion_one_fn_romance, proportion_one_fn_history, \
proportion_one_fn_adventure]
genre_titles = ["Memoirs", "Letters", "Tales", "Novel", "Romance", "History", "Adventure"]
proportions_one_fn_full = pd.DataFrame(zip(proportions_one_fn_full, genre_titles), columns = ['Proportion in Corpus', 'Genre'])
proportions_one_fn_full
| Proportion in Corpus | Genre | |
|---|---|---|
| 0 | 0.065364 | Memoirs |
| 1 | 0.125337 | Letters |
| 2 | 0.080189 | Tales |
| 3 | 0.163073 | Novel |
| 4 | 0.026954 | Romance |
| 5 | 0.195418 | History |
| 6 | 0.123315 | Adventure |
print("ECCO Corpus")
proportions_ECCO_full
ECCO Corpus
| Proportion in Corpus | Genre | |
|---|---|---|
| 0 | 0.055891 | Memoirs |
| 1 | 0.132931 | Letters |
| 2 | 0.077039 | Tales |
| 3 | 0.121543 | Novel |
| 4 | 0.018243 | Romance |
| 5 | 0.189635 | History |
| 6 | 0.113409 | Adventure |
print("One Footnote Corpus")
proportions_one_fn_full
One Footnote Corpus
| Proportion in Corpus | Genre | |
|---|---|---|
| 0 | 0.065364 | Memoirs |
| 1 | 0.125337 | Letters |
| 2 | 0.080189 | Tales |
| 3 | 0.163073 | Novel |
| 4 | 0.026954 | Romance |
| 5 | 0.195418 | History |
| 6 | 0.123315 | Adventure |
# Determine correlation coefficent (the strength of the similarity/association between the two and the p-value)
pearson_test = stats.pearsonr(proportions_ECCO_full['Proportion in Corpus'], proportions_one_fn_full['Proportion in Corpus'])
pearson_test
PearsonRResult(statistic=0.9656445215548639, pvalue=0.00041248063606768947)
# Test to deterime if distribitutions are statistically signifcantly different
# Null hypothesis assumes the distributions are the same, so p value of under 0.05 would indicate a difference in distribtution
stats.ks_2samp(proportions_ECCO_full['Proportion in Corpus'], proportions_one_fn_full['Proportion in Corpus'])
KstestResult(statistic=0.2857142857142857, pvalue=0.9627039627039629, statistic_location=0.12154310945851732, statistic_sign=1)
genre_term = "memoir|Memoir"
dataframe = one_footnote_df[one_footnote_df['footnotes_present'] != 0]
corpus_name = "One-Footnote Corpus, with footnotes)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_footnoted_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'memoir|Memoir': 30 Percent of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'memoir|Memoir': 0.04854368932038835
genre_term = "letter|Letter|lettre|Lettre"
dataframe = one_footnote_df[one_footnote_df['footnotes_present'] != 0]
corpus_name = "One-Footnote Corpus, with footnotes)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_footnoted_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'letter|Letter|lettre|Lettre': 84 Percent of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'letter|Letter|lettre|Lettre': 0.13592233009708737
genre_term = "novel|Novel|roman|Roman"
dataframe = one_footnote_df[one_footnote_df['footnotes_present'] != 0]
corpus_name = "One-Footnote Corpus, with footnotes)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'novel|Novel|roman|Roman': 66 Percent of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'novel|Novel|roman|Roman': 0.10679611650485436
genre_term = "tale|Tale|recit|Recit"
dataframe = one_footnote_df[one_footnote_df['footnotes_present'] != 0]
corpus_name = "One-Footnote Corpus, with footnotes)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_one_fn_footnoted_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'tale|Tale|recit|Recit': 54 Percent of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'tale|Tale|recit|Recit': 0.08737864077669903
genre_term = "romance|Romance"
dataframe = one_footnote_df[one_footnote_df['footnotes_present'] != 0]
corpus_name = "One-Footnote Corpus, with footnotes)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'romance|Romance': 18 Percent of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'romance|Romance': 0.02912621359223301
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = one_footnote_df[one_footnote_df['footnotes_present'] != 0]
corpus_name = "One-Footnote Corpus, with footnotes)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'history|History|histories|Histories|histoire|Histoire': 127 Percent of volumes in the One-Footnote Corpus, with footnotes) that contain the words 'history|History|histories|Histories|histoire|Histoire': 0.20550161812297735
# Proportions of Memoirs, Letters, Tales
proportions_one_fn_footnoted = [proportion_one_fn_footnoted_memoirs, proportion_one_fn_footnoted_letters, proportion_one_fn_footnoted_tale]
proportions_one_fn_footnoted
[0.04854368932038835, 0.13592233009708737, 0.08737864077669903]
# Calcuate the proportion of footnotes in the onefootnotes
proportion_of_miscategorized_footnotes_by_date = one_footnote_miscategorized_date_dist / one_footnote_date_dist
proportion_of_miscategorized_footnotes_by_date = proportion_of_miscategorized_footnotes_by_date.reset_index()
proportion_of_miscategorized_footnotes_by_date.rename(columns = {'index':'Date', 'Date':'Proportion miscategroized'}, inplace=True)
proportion_of_miscategorized_footnotes_by_date
| Date | Proportion miscategroized | |
|---|---|---|
| 0 | 1697 | 1.000000 |
| 1 | 1701 | 0.500000 |
| 2 | 1702 | 1.000000 |
| 3 | 1703 | 1.000000 |
| 4 | 1704 | 1.000000 |
| ... | ... | ... |
| 95 | 1796 | 0.588235 |
| 96 | 1797 | 0.707317 |
| 97 | 1798 | 0.600000 |
| 98 | 1799 | 0.702128 |
| 99 | 1800 | 0.661290 |
100 rows × 2 columns
## Test if there's a correlation between the number of miscategorized footnotes and the date
## If the number is close to 1 (between 1 and 0.8), we would expect to see a strong positive correlation, ie
# the later decades are more often misclassified. If the nubmer is between 0.6 - 0.8, moderate correation
# If the number is between -1 and -0.8, that would mean a strong negative correlation.
proportion_of_miscategorized_footnotes_by_date.corr()
| Date | Proportion miscategroized | |
|---|---|---|
| Date | 1.000000 | -0.265123 |
| Proportion miscategroized | -0.265123 | 1.000000 |
If the model performed better, we would expect there to be a strong negative correlation (ie, as the date goes up, the proporition of miscategorized footnotes goes down). In the case above, a Pearson's value of -0.265 is under -0.6, which tells us there's no correlation between the date and the
one_footnote_miscategorized_decade_dist = miscategorized_pages_in_one_fn_set['decade'].value_counts()
one_footnote_miscategorized_decade_dist
1790 248 1780 148 1770 126 1760 101 1750 76 1800 41 1740 34 1720 30 1710 22 1730 21 1700 18 1690 1 Name: decade, dtype: int64
one_footnote_decade_dist = one_footnote_df['decade'].value_counts()
one_footnote_decade_dist
1790 396 1780 255 1770 236 1760 184 1750 121 1740 75 1800 62 1720 54 1730 44 1710 36 1700 20 1690 1 Name: decade, dtype: int64
# Calcuate the proportion of footnotes in the onefootnotes
proportion_of_miscategorized_footnotes_by_decade = one_footnote_miscategorized_decade_dist / one_footnote_decade_dist
proportion_of_miscategorized_footnotes_by_decade = proportion_of_miscategorized_footnotes_by_decade.reset_index()
proportion_of_miscategorized_footnotes_by_decade.rename(columns = {'index':'decade', 'decade':'Proportion miscategroized'}, inplace=True)
proportion_of_miscategorized_footnotes_by_decade
| decade | Proportion miscategroized | |
|---|---|---|
| 0 | 1690 | 1.000000 |
| 1 | 1700 | 0.900000 |
| 2 | 1710 | 0.611111 |
| 3 | 1720 | 0.555556 |
| 4 | 1730 | 0.477273 |
| 5 | 1740 | 0.453333 |
| 6 | 1750 | 0.628099 |
| 7 | 1760 | 0.548913 |
| 8 | 1770 | 0.533898 |
| 9 | 1780 | 0.580392 |
| 10 | 1790 | 0.626263 |
| 11 | 1800 | 0.661290 |
proportion_of_miscategorized_footnotes_by_decade.corr()
| decade | Proportion miscategroized | |
|---|---|---|
| decade | 1.00000 | -0.47584 |
| Proportion miscategroized | -0.47584 | 1.00000 |
#one_footnote_decade_dist
footnotes_df.groupby('decade')['decade'].count()
decade 1690 1 1700 149 1710 245 1720 380 1730 325 1740 445 1750 721 1760 1018 1770 1182 1780 1611 1790 2138 1800 391 Name: decade, dtype: int64
print("Number of volumes per decade, ECCO Corpus")
vols_count_footnotes_df = footnotes_df['decade'].value_counts().sort_index()
#vols_count_footnotes_df
Number of volumes per decade, ECCO Corpus
vols_count_footnotes_df = vols_count_footnotes_df.reset_index()
vols_count_footnotes_df.rename(columns={'decade': 'Number_of_Total_Volumes', 'index': 'decade'}, inplace=True)
vols_count_footnotes_df
| decade | Number_of_Total_Volumes | |
|---|---|---|
| 0 | 1690 | 1 |
| 1 | 1700 | 149 |
| 2 | 1710 | 245 |
| 3 | 1720 | 380 |
| 4 | 1730 | 325 |
| 5 | 1740 | 445 |
| 6 | 1750 | 721 |
| 7 | 1760 | 1018 |
| 8 | 1770 | 1182 |
| 9 | 1780 | 1611 |
| 10 | 1790 | 2138 |
| 11 | 1800 | 391 |
print("Number of volumes per decade, one-footnnote corpus")
vols_count_one_footnote_df = one_footnote_decade_dist.sort_index()
#vols_count_one_footnote_df
Number of volumes per decade, one-footnnote corpus
vols_count_one_footnote_df = vols_count_one_footnote_df.reset_index()
vols_count_one_footnote_df.rename(columns={'decade': 'Number_of_Volumes_One_footnote_Corpus', 'index': 'decade'}, inplace=True)
vols_count_one_footnote_df
| decade | Number_of_Volumes_One_footnote_Corpus | |
|---|---|---|
| 0 | 1690 | 1 |
| 1 | 1700 | 20 |
| 2 | 1710 | 36 |
| 3 | 1720 | 54 |
| 4 | 1730 | 44 |
| 5 | 1740 | 75 |
| 6 | 1750 | 121 |
| 7 | 1760 | 184 |
| 8 | 1770 | 236 |
| 9 | 1780 | 255 |
| 10 | 1790 | 396 |
| 11 | 1800 | 62 |
vols_count_footnotes_df['One_footnote_Corpus_Volumes'] = vols_count_one_footnote_df['Number_of_Volumes_One_footnote_Corpus']
vols_count_footnotes_df
| decade | Number_of_Total_Volumes | One_footnote_Corpus_Volumes | |
|---|---|---|---|
| 0 | 1690 | 1 | 1 |
| 1 | 1700 | 149 | 20 |
| 2 | 1710 | 245 | 36 |
| 3 | 1720 | 380 | 54 |
| 4 | 1730 | 325 | 44 |
| 5 | 1740 | 445 | 75 |
| 6 | 1750 | 721 | 121 |
| 7 | 1760 | 1018 | 184 |
| 8 | 1770 | 1182 | 236 |
| 9 | 1780 | 1611 | 255 |
| 10 | 1790 | 2138 | 396 |
| 11 | 1800 | 391 | 62 |
vols_count_footnotes_df['one_footnote_proportion'] = vols_count_footnotes_df['One_footnote_Corpus_Volumes'] / vols_count_footnotes_df['Number_of_Total_Volumes']
vols_count_footnotes_df['one_footnote_proportion'][1:].mean()
0.1615910158156099
vols_count_footnotes_df['one_footnote_proportion']
0 1.000000 1 0.134228 2 0.146939 3 0.142105 4 0.135385 5 0.168539 6 0.167822 7 0.180747 8 0.199662 9 0.158287 10 0.185220 11 0.158568 Name: one_footnote_proportion, dtype: float64
vols_count_footnotes_df['One_footnote_Corpus_Volumes']
0 1 1 20 2 36 3 54 4 44 5 75 6 121 7 184 8 236 9 255 10 396 11 62 Name: One_footnote_Corpus_Volumes, dtype: int64
stats.ks_2samp(vols_count_footnotes_df['One_footnote_Corpus_Volumes']*(vols_count_footnotes_df['one_footnote_proportion'][1:].mean()), vols_count_footnotes_df['One_footnote_Corpus_Volumes'][1:])
KstestResult(statistic=0.7348484848484849, pvalue=0.002215848493947834, statistic_location=41.20570903298052, statistic_sign=1)
vols_count_footnotes_df.plot.scatter('decade', 'one_footnote_proportion', title="Volumes in one-footnote corpus, as proportion of larger coprus, by decade")
<Axes: title={'center': 'Volumes in one-footnote corpus, as proportion of larger coprus, by decade'}, xlabel='decade', ylabel='one_footnote_proportion'>
result = stats.linregress(vols_count_footnotes_df['decade'][1:], vols_count_footnotes_df['one_footnote_proportion'][1:])
print(result)
print(f"The slope of the line measuring proportion of footnotes/decade is:{result.slope}")
LinregressResult(slope=0.00042193444687323124, intercept=-0.5767942662125448, rvalue=0.6588285658948222, pvalue=0.027480872398679062, stderr=0.0001605973556495961, intercept_stderr=0.2810912536004507) The slope of the line measuring proportion of footnotes/decade is:0.00042193444687323124
# Let's perform Pearson's test for correlation between number of volumes in one footnote corpus and total corpus
# We assume the null hypthoesis: there is no association between the variables
rho, p = pearsonr(vols_count_footnotes_df['One_footnote_Corpus_Volumes'],vols_count_footnotes_df['Number_of_Total_Volumes'])
#print Pearson rank correlation and p-value
print("CORRELATION BETWEEN NUMBER OF VOLUMES (TOTAL) AND NUMBER OF ONE-FOOTNOTE VOLUMES")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
CORRELATION BETWEEN NUMBER OF VOLUMES (TOTAL) AND NUMBER OF ONE-FOOTNOTE VOLUMES Pearson rank correlation: 0.993118033900296 p-value: 1.2017810023725311e-10
This data subset represents all volumes in ECCOFictionTXTLab.csv that contain one page with footnotes predicted. The individual page images for each of these pages was subsequently human-verified to validate the footnote prediction and count the number of footnotes present.
This subset is created from the ECCO-Footnote-Manifest-volumes-with-one-footnote.csv, using the data from two sets of researcher data, both updated as of April 9, 2021. We have concatenated the two sets here.
fn_pages: number of pages with footnotes predicted by the model's four machine learning and classification algorithms
nofn_pages: number of pages with no footnotes predicted by the model's four machine learning and classification algorithms
TableName: Name of metadata table
fn.percent: percentage of pages with footnotes predicted
footnotes_present: number of footnotes present (human-verified)
tags: controlled vocabular of tags describing the type of footnote or non-footnote typoraphic features
# Create a new dataframe with the annotated footnotes
one_footnote_tagged_df = pd.read_csv('Human-Verified-ECCO-Footnote-Manifest-volumes-with-one-footnote-with-tags.csv', encoding='utf-8', parse_dates=True)
one_footnote_tagged_df.head()
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | C, IT | NaN | NaN |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | CW | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | BQ, P, TY, TE | NaN | NaN |
| 3 | 450 | 0000200500.xml | 200500 | T128705 | 1791 | Scelta di novelle di Giovanni Boccaccio, fatta... | 0 | Boccaccio, Giovanni | In Londra : presso Francesco Wingrave, success... | Italian fiction, 18th century | 1 | 306 | Manifest_LitAndLang1 | 0.003257 | 1790 | 0 | SE | NaN | NaN |
| 4 | 453 | 0000200700.xml | 200700 | T128696 | 1741 | The decameron, or ten days entertainment of Bo... | 0 | Boccaccio, Giovanni | London : printed for R. Dodsley, at Tully's He... | Wood-engraving, English, Specimens, 17th century | 1 | 600 | Manifest_LitAndLang1 | 0.001664 | 1740 | 2 | NaN | NaN | NaN |
one_footnote_tagged_df['footnotes_present'].sum()
675
Terminology
TP (True Positives): known
TN (True Negatives): unknown
FP (False Posiives): known
FN (False Negatives): unknown
Because we don't have the TIFF files for pages that the machine learning and classification algorithms predicted to contain no footnotes, we can't compute false negative and true metrics. This also means so we can't comput the full classification accuracy, recall, or specificity
(Classification accuracy: percentage of correct predictions in the one footnote subset, or overall, how often is the classifier correct?
classification_accuracy = (TP + TN) / float(TP + TN + FP + FN)))Recall (or "sensitivity"): When the actual value is positive, how often is the prediction correct?
recall = TP / float(FN + TP)
But what we do have is the number of false positives for the subset.
Precision: When a positive value is predicted, how often is the prediction correct?
precision = TP / float(TP + FP)
#Calculate the number of true positives
TP = one_footnote_tagged_df[one_footnote_tagged_df['footnotes_present'] != 0]['DocumentID'].count()
print("True Positives:", TP)
True Positives: 624
#Calculate the number of false positives
FP = one_footnote_tagged_df[one_footnote_tagged_df['footnotes_present'] == 0]['DocumentID'].count()
print("False Positives:", FP)
False Positives: 860
Let's calculate the precision score
#Calculate the precision score
precision = TP / float(TP + FP)
print("Precision score:", precision)
print("Precision score:", precision * 100, "%")
Precision score: 0.42048517520215634 Precision score: 42.04851752021563 %
# splitting multivalue cells into rows by delimiter ','
tags_list = one_footnote_tagged_df.tags.str.split(',')
one_footnote_tagged_df.tags = tags_list
one_footnote_tagged_mr_df = one_footnote_tagged_df.explode('tags')
one_footnote_tagged_mr_df.head(10)
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | C | NaN | NaN |
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | IT | NaN | NaN |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | CW | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | BQ | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | P | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | TY | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | TE | NaN | NaN |
| 3 | 450 | 0000200500.xml | 200500 | T128705 | 1791 | Scelta di novelle di Giovanni Boccaccio, fatta... | 0 | Boccaccio, Giovanni | In Londra : presso Francesco Wingrave, success... | Italian fiction, 18th century | 1 | 306 | Manifest_LitAndLang1 | 0.003257 | 1790 | 0 | SE | NaN | NaN |
| 4 | 453 | 0000200700.xml | 200700 | T128696 | 1741 | The decameron, or ten days entertainment of Bo... | 0 | Boccaccio, Giovanni | London : printed for R. Dodsley, at Tully's He... | Wood-engraving, English, Specimens, 17th century | 1 | 600 | Manifest_LitAndLang1 | 0.001664 | 1740 | 2 | NaN | NaN | NaN |
| 5 | 458 | 0000300202.xml | 300202 | T102664 | 1789 | Zelia in the desert. From the French. By the l... | Volume 2 | Daubenton, Mme. (Marguerite) | London : printed for G. and T. Wilkie, No 71, ... | Voyages, Imaginary, Early works to 1800 | 1 | 257 | Manifest_LitAndLang1 | 0.003876 | 1780 | 0 | CW | NaN | NaN |
one_footnote_tagged_mr_df['tags'].dtype
dtype('O')
# splitting multi-valued cells by delimiter '/'
tags_list = one_footnote_tagged_mr_df.tags.str.split('/')
one_footnote_tagged_mr_df.tags = tags_list
one_footnote_tagged_MR_df = one_footnote_tagged_mr_df.explode('tags')
one_footnote_tagged_MR_df
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | C | NaN | NaN |
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | IT | NaN | NaN |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | CW | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | BQ | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | P | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1482 | 8387 | 1294000101.xml | 1294000101 | N061349 | 1787 | The observer: being a collection of moral, lit... | Volume 1 | Cumberland, Richard | London : printed for C. Dilly in the Poultry, ... | Conduct of life, Early works to 1900, Greek li... | 1 | 303 | Manifest_LitAndLang2 | 0.003289 | 1780 | 0 | LB | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | LB | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | P | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | TY | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | CW | NaN | NaN |
3068 rows × 19 columns
# Removing whitespaces
one_footnote_tagged_MR_df.tags = one_footnote_tagged_MR_df.tags.str.replace(' ', '')
one_footnote_tagged_MR_df['tags'].value_counts()
CW 600 TY 411 C 235 FN 226 SM 203 P 175 PB 167 BQ 161 IT 122 LB 118 T 93 O 90 TE 87 IN 82 SE 65 MT 50 LS 44 PO 41 RQ 22 D 12 M 10 PE 4 SD 3 I 2 P.LB 1 TY.CW 1 CW.TY 1 RQ.CW 1 BP 1 LP 1 NY 1 LD 1 CH 1 TE.LS 1 Name: tags, dtype: int64
one_footnote_tagged_MR_df
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | C | NaN | NaN |
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | IT | NaN | NaN |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | CW | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | BQ | NaN | NaN |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | P | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1482 | 8387 | 1294000101.xml | 1294000101 | N061349 | 1787 | The observer: being a collection of moral, lit... | Volume 1 | Cumberland, Richard | London : printed for C. Dilly in the Poultry, ... | Conduct of life, Early works to 1900, Greek li... | 1 | 303 | Manifest_LitAndLang2 | 0.003289 | 1780 | 0 | LB | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | LB | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | P | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | TY | NaN | NaN |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | CW | NaN | NaN |
3068 rows × 19 columns
# Adding a new column called "tag" concatenating the results from "tags" and
# "Unanamed: 17" the secondary tag column. We'll split these up later
one_footnote_tagged_MR_df['tag'] = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df.columns[16:]].apply(
lambda x: '/'.join(x.dropna().astype(str)),
axis=1
)
one_footnote_tagged_MR_df
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | tag | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | C | NaN | NaN | C |
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | IT | NaN | NaN | IT |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | CW | NaN | NaN | CW |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | BQ | NaN | NaN | BQ |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | P | NaN | NaN | P |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1482 | 8387 | 1294000101.xml | 1294000101 | N061349 | 1787 | The observer: being a collection of moral, lit... | Volume 1 | Cumberland, Richard | London : printed for C. Dilly in the Poultry, ... | Conduct of life, Early works to 1900, Greek li... | 1 | 303 | Manifest_LitAndLang2 | 0.003289 | 1780 | 0 | LB | NaN | NaN | LB |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | LB | NaN | NaN | LB |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | P | NaN | NaN | P |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | TY | NaN | NaN | TY |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | CW | NaN | NaN | CW |
3068 rows × 20 columns
# Now, splitting multi-valued cells in "tag" by /
tag_list = one_footnote_tagged_MR_df.tag.str.split('/')
one_footnote_tagged_MR_df.tag = tag_list
one_footnote_tagged_MR_df = one_footnote_tagged_MR_df.explode('tag')
one_footnote_tagged_MR_df
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | tag | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | C | NaN | NaN | C |
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | IT | NaN | NaN | IT |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | CW | NaN | NaN | CW |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | BQ | NaN | NaN | BQ |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | P | NaN | NaN | P |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1482 | 8387 | 1294000101.xml | 1294000101 | N061349 | 1787 | The observer: being a collection of moral, lit... | Volume 1 | Cumberland, Richard | London : printed for C. Dilly in the Poultry, ... | Conduct of life, Early works to 1900, Greek li... | 1 | 303 | Manifest_LitAndLang2 | 0.003289 | 1780 | 0 | LB | NaN | NaN | LB |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | LB | NaN | NaN | LB |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | P | NaN | NaN | P |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | TY | NaN | NaN | TY |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | CW | NaN | NaN | CW |
3354 rows × 20 columns
# Now, splitting multi-valued cells in "tag" by .
tag_list = one_footnote_tagged_MR_df.tag.str.split('.')
one_footnote_tagged_MR_df.tag = tag_list
one_footnote_tagged_MR_df = one_footnote_tagged_MR_df.explode('tag')
one_footnote_tagged_MR_df.tag
0 C
0 IT
1 CW
2 BQ
2 P
..
1482 LB
1483 LB
1483 P
1483 TY
1483 CW
Name: tag, Length: 3359, dtype: object
# Now, splitting multi-valued cells in "tag" by ,
tag_list = one_footnote_tagged_MR_df.tag.str.split(',')
one_footnote_tagged_MR_df.tag = tag_list
one_footnote_tagged_MR_df = one_footnote_tagged_MR_df.explode('tag')
one_footnote_tagged_MR_df.tag
0 C
0 IT
1 CW
2 BQ
2 P
..
1482 LB
1483 LB
1483 P
1483 TY
1483 CW
Name: tag, Length: 3385, dtype: object
# Removing whitespaces
one_footnote_tagged_MR_df.tag = one_footnote_tagged_MR_df.tag.str.replace(' ', '')
one_footnote_tagged_MR_df['tag'].value_counts()
CW 603
TY 413
C 367
FN 226
SM 203
IT 185
P 176
PB 167
BQ 161
LB 119
IN 116
T 109
O 90
TE 88
SE 65
NN 57
MT 55
LS 45
PO 41
39
RQ 23
D 12
M 10
PE 4
SD 3
I 2
NY 1
R 1
LD 1
BP 1
CH 1
LP 1
Name: tag, dtype: int64
# Create two new new column with a Boolean True/False for whether footnotes present
one_footnote_tagged_MR_df.loc[one_footnote_tagged_MR_df['footnotes_present'] > 0, 'fn_present_bool'] = 'True'
one_footnote_tagged_MR_df.loc[one_footnote_tagged_MR_df['footnotes_present'] == 0, 'fn_present_bool'] = 'False'
one_footnote_tagged_MR_df
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | ... | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | tag | fn_present_bool | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | ... | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | C | NaN | NaN | C | True |
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | ... | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | IT | NaN | NaN | IT | True |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | ... | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | CW | NaN | NaN | CW | False |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | ... | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | BQ | NaN | NaN | BQ | False |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | ... | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | P | NaN | NaN | P | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1482 | 8387 | 1294000101.xml | 1294000101 | N061349 | 1787 | The observer: being a collection of moral, lit... | Volume 1 | Cumberland, Richard | London : printed for C. Dilly in the Poultry, ... | Conduct of life, Early works to 1900, Greek li... | ... | 303 | Manifest_LitAndLang2 | 0.003289 | 1780 | 0 | LB | NaN | NaN | LB | False |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | ... | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | LB | NaN | NaN | LB | False |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | ... | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | P | NaN | NaN | P | False |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | ... | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | TY | NaN | NaN | TY | False |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | ... | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | CW | NaN | NaN | CW | False |
3385 rows × 21 columns
# Group tags using our boolean column, "fn_present"
one_footnote_tagged_MR_df.groupby(['tag','fn_present_bool'])['footnotes_present'].count()
tag fn_present_bool
False 12
True 27
BP False 1
BQ False 159
True 2
C False 8
True 359
CH False 1
CW False 593
True 10
D False 12
FN False 9
True 217
I False 2
IN False 6
True 110
IT False 4
True 181
LB False 118
True 1
LD False 1
LP False 1
LS False 45
M False 9
True 1
MT True 55
NN False 41
True 16
NY False 1
O False 87
True 3
P False 174
True 2
PB False 165
True 2
PE False 4
PO False 41
R False 1
RQ False 23
SD False 3
SE False 64
True 1
SM False 201
True 2
T False 2
True 107
TE False 88
TY False 408
True 5
Name: footnotes_present, dtype: int64
# Assign this groupby object to variable
tags_df = one_footnote_tagged_MR_df.groupby(['tag','fn_present_bool'])['footnotes_present'].count()
# Unstack so that we have two new columns for our boolean True, False
tags_unstacked_df = tags_df.unstack(level=-1)
print("Tag counts, sorted by whether or not footnotes present:")
tags_unstacked_df
Tag counts, sorted by whether or not footnotes present:
| fn_present_bool | False | True |
|---|---|---|
| tag | ||
| 12.0 | 27.0 | |
| BP | 1.0 | NaN |
| BQ | 159.0 | 2.0 |
| C | 8.0 | 359.0 |
| CH | 1.0 | NaN |
| CW | 593.0 | 10.0 |
| D | 12.0 | NaN |
| FN | 9.0 | 217.0 |
| I | 2.0 | NaN |
| IN | 6.0 | 110.0 |
| IT | 4.0 | 181.0 |
| LB | 118.0 | 1.0 |
| LD | 1.0 | NaN |
| LP | 1.0 | NaN |
| LS | 45.0 | NaN |
| M | 9.0 | 1.0 |
| MT | NaN | 55.0 |
| NN | 41.0 | 16.0 |
| NY | 1.0 | NaN |
| O | 87.0 | 3.0 |
| P | 174.0 | 2.0 |
| PB | 165.0 | 2.0 |
| PE | 4.0 | NaN |
| PO | 41.0 | NaN |
| R | 1.0 | NaN |
| RQ | 23.0 | NaN |
| SD | 3.0 | NaN |
| SE | 64.0 | 1.0 |
| SM | 201.0 | 2.0 |
| T | 2.0 | 107.0 |
| TE | 88.0 | NaN |
| TY | 408.0 | 5.0 |
Note that these counts are pages not footnotes. There are a few pages that have more than one footnote.
# Plot our tags
ax = tags_unstacked_df.plot(kind='bar', color=['tab:orange','tab:blue'], title="Number of footnoted and unfootnoted pages for each tag", figsize=(16,8), alpha=0.75, rot=0)
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
plt.ylabel("Number of pages")
Text(0, 0.5, 'Number of pages')
just_footnotes = one_footnote_tagged_MR_df.loc[((one_footnote_tagged_MR_df['tag'] == 'C') | (one_footnote_tagged_MR_df['tag'] == 'T') | (one_footnote_tagged_MR_df['tag'] == 'IT') | (one_footnote_tagged_MR_df['tag'] == 'IN') | (one_footnote_tagged_MR_df['tag'] == 'MT'))]
Note that the number of tags is larger than the number of footnotes (675 footnotes) because many footnotes have been tagged to belog to both category.
just_footnotes.groupby(['tag'])['footnotes_present'].value_counts()
tag footnotes_present
C 1 346
2 12
0 8
3 1
IN 1 110
0 6
IT 1 176
0 4
2 3
3 1
4 1
MT 1 55
T 1 105
0 2
2 2
Name: footnotes_present, dtype: int64
just_footnotes['footnotes_present'].sum()
836
# Group tags using our boolean column, "fn_present"
#just_footnotes = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['footnotes_present'] == 1]
ax =just_footnotes.groupby(['tag'])['footnotes_present'].sum().plot(kind='bar', title="Number of each type of footnote in one-footnote subset")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.05, p.get_height() * 1.05))
ax = just_footnotes.groupby(['tag'])['footnotes_present'].sum().plot(kind='barh', title="Number of each type of footnote in one-footnote subset")
for p in ax.patches:
ax.annotate(p.get_width(), (p.get_x() + p.get_width(), p.get_y()), xytext=(5, 10), textcoords='offset points')
# Group tags using our boolean column, "fn_present"
#just_footnotes = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['footnotes_present'] == 1]
ax =just_footnotes.groupby(['tag'])['footnotes_present'].sum().plot(kind='barh', title="Number of each type of footnote in one-footnote subset")
for p in ax.patches:
ax.annotate(p.get_width(), (p.get_x() + p.get_width(), p.get_y()), xytext=(2, 10), textcoords='offset points')
# Adding a new column called "tag" concatenating the results from "tags" and
# "Unanamed: 17" the secondary tag columns.
one_footnote_tagged_new_df = one_footnote_tagged_df
one_footnote_tagged_new_df['tag'] = one_footnote_tagged_df[one_footnote_tagged_df.columns[16:]].apply(
lambda x: ','.join(x.dropna().astype(str)),
axis=1
)
one_footnote_tagged_new_df
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | tags | Unnamed: 17 | Unnamed: 18 | tag | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 5098 | 0000100701.xml | 100701 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 1 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 259 | Manifest_LitAndLang2 | 0.003846 | 1770 | 1 | [C, IT] | NaN | NaN | ['C', ' IT'] |
| 1 | 5099 | 0000100702.xml | 100702 | T134638 | 1779 | Prince Arthur: an allegorical romance. The sto... | Volume 2 | Bicknell, Alexander | London : printed for G. Riley; and sold by F. ... | Spenser, Edmund,, 1552?-1599, Adaptations | 1 | 261 | Manifest_LitAndLang2 | 0.003817 | 1770 | 0 | [CW] | NaN | NaN | ['CW'] |
| 2 | 5100 | 0000100800.xml | 100800 | T112791 | 1788 | Ismene and Ismenias, a novel translated from t... | 0 | Eustathius, Macrembolites | London [i.e. Paris] : et se trouve <c3><a0> Pa... | Eustathius,, Macrembolites,, 12th cent., De Is... | 1 | 211 | Manifest_LitAndLang2 | 0.004717 | 1780 | 0 | [BQ, P, TY, TE] | NaN | NaN | ['BQ', ' P', ' TY', ' TE'] |
| 3 | 450 | 0000200500.xml | 200500 | T128705 | 1791 | Scelta di novelle di Giovanni Boccaccio, fatta... | 0 | Boccaccio, Giovanni | In Londra : presso Francesco Wingrave, success... | Italian fiction, 18th century | 1 | 306 | Manifest_LitAndLang1 | 0.003257 | 1790 | 0 | [SE] | NaN | NaN | ['SE'] |
| 4 | 453 | 0000200700.xml | 200700 | T128696 | 1741 | The decameron, or ten days entertainment of Bo... | 0 | Boccaccio, Giovanni | London : printed for R. Dodsley, at Tully's He... | Wood-engraving, English, Specimens, 17th century | 1 | 600 | Manifest_LitAndLang1 | 0.001664 | 1740 | 2 | NaN | NaN | NaN | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1479 | 8371 | 1293500800.xml | 1293500800 | N063224 | 1754 | The tatler. By the Right Honourable Joseph Add... | 0 | Addison, Joseph | Glasgow : printed by Robert Urie, MDCCLIV. [17... | Short stories, English, Early works to 1800 | 1 | 262 | Manifest_LitAndLang2 | 0.003802 | 1750 | 0 | [LB, TY, SM] | NaN | NaN | ['LB', ' TY', ' SM'] |
| 1480 | 8379 | 1293700602.xml | 1293700602 | N069021 | 1787 | Olivia; or, deserted bride. By the author of H... | Volume 2 | Bonhote, Mrs. (Elizabeth) | Dublin : printed for Messrs. W. Watson, Gilber... | English fiction, 18th century | 1 | 273 | Manifest_LitAndLang2 | 0.003650 | 1780 | 0 | [LB, TY, CW, SM] | NaN | NaN | ['LB', ' TY', ' CW', ' SM'] |
| 1481 | 8385 | 1293900700.xml | 1293900700 | N031640 | 1761 | The fables of Ph<c3><a6>drus in Latin and Engl... | 0 | Phaedrus | Dublin : printed for John Exshaw, 1761. | Fables | 1 | 142 | Manifest_LitAndLang2 | 0.006993 | 1760 | 0 | [TY, CW] | NaN | NaN | ['TY', ' CW'] |
| 1482 | 8387 | 1294000101.xml | 1294000101 | N061349 | 1787 | The observer: being a collection of moral, lit... | Volume 1 | Cumberland, Richard | London : printed for C. Dilly in the Poultry, ... | Conduct of life, Early works to 1900, Greek li... | 1 | 303 | Manifest_LitAndLang2 | 0.003289 | 1780 | 0 | [RQ, P, CW, LB] | NaN | NaN | ['RQ', ' P', ' CW', ' LB'] |
| 1483 | 8396 | 1294100502.xml | 1294100502 | N017918 | 1752 | The guardian. In two volumes. To which is adde... | Volume 2 | Anon | Dublin : printed for Peter Wilson, in Dame-Str... | Great Britain, Politics and government, 1702-1... | 1 | 412 | Manifest_LitAndLang2 | 0.002421 | 1750 | 0 | [LB, P, TY, CW] | NaN | NaN | ['LB', ' P', ' TY', ' CW'] |
1484 rows × 20 columns
one_footnote_tagged_new_df['tag'].value_counts().to_csv('multi-tag-categories.csv')
## Exported to Open-Refine, where I used k-means clustering to match different sequences of tag combinations
## eg a footnote tagged CW, PB is the same as PB, CW
## Re-importing cleaned data below
multi_tagged_footnotes = pd.read_csv('multi-tag-categories-cleaned.csv', encoding='utf-8')
# Plot the combinations of tags
ax = multi_tagged_footnotes.groupby('Tag')['Count'].sum().sort_values(ascending=True)\
.plot(kind='barh', figsize=(20,40), title="How Many Times each Combination of Footnote Tags Appear Together")
for p in ax.patches:
ax.annotate(p.get_width(), (p.get_x() + p.get_width(), p.get_y()), xytext=(5, 10), textcoords='offset points')
multi_tagged_footnotes
| Count | Tag | |
|---|---|---|
| 0 | 103 | FN, C |
| 1 | 91 | CW |
| 2 | 82 | C |
| 3 | 64 | C, T |
| 4 | 51 | IT |
| ... | ... | ... |
| 404 | 1 | LB, CW |
| 405 | 1 | CW, PO |
| 406 | 1 | LB, TY, CW, SM |
| 407 | 1 | BQ, P, CW, TY, PB |
| 408 | 1 | C, IT, IN, MT |
409 rows × 2 columns
ax = multi_tagged_footnotes.groupby('Tag')['Count'].sum().sort_values(ascending=False).head(25).plot(kind='bar', title="Top 25 Tag Combinations")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
print("Number of total volumes in our one-footnote subset:")
one_footnote_tagged_df['footnotes_present'].count()
Number of total volumes in our one-footnote subset:
1484
print("Number of volumes in subset WITH footnotes:")
one_footnote_tagged_df[one_footnote_tagged_df['footnotes_present'] != 0]['tag'].count()
Number of volumes in subset WITH footnotes:
624
print("Number of novels (i.e. unique ESTC_C) in subset WITH footnotes:")
one_footnote_tagged_df[one_footnote_tagged_df['footnotes_present'] != 0]["ESTC_ID"].nunique()
Number of novels (i.e. unique ESTC_C) in subset WITH footnotes:
571
print("Number of volumes in subset WITHOUT footnotes:")
one_footnote_tagged_df[one_footnote_tagged_df['footnotes_present'] == 0]['tag'].count()
Number of volumes in subset WITHOUT footnotes:
860
print('Number of volumes in subset tagged "Not a Novel"')
one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'NN']['tag'].count() # note the difference n dataframe
Number of volumes in subset tagged "Not a Novel"
57
# Print out the number of times each tag appears in our dataset
print('Tags in our dataset, by count:')
one_footnote_tagged_MR_df['tag'].value_counts()
Tags in our dataset, by count:
CW 603
TY 413
C 367
FN 226
SM 203
IT 185
P 176
PB 167
BQ 161
LB 119
IN 116
T 109
O 90
TE 88
SE 65
NN 57
MT 55
LS 45
PO 41
39
RQ 23
D 12
M 10
PE 4
SD 3
I 2
NY 1
R 1
LD 1
BP 1
CH 1
LP 1
Name: tag, dtype: int64
We classified each footnote in the one-footnote corpus by function:
Context (C): footnotes that provide historical or cultural context (C)
Translation (T, which is a subset of, and thus also tagged as, C): footnotes that provide translations
Intertextual (IT): footnotes that make an intertextual reference to another work
Intratextual (IN): footnotes that make an intratextual reference to another point in the text or the narrative
Metatextual (MT) footnotes that offer a metatextual commentary on the narrative or print form of the fictional work
Note: We allowed ourselves give footnotes multiple tags if relevant.
Click on the links below for more in-depth descriptive statistics for each of the following footnote categories:
We also include links to the tags assigned to pages misclassified as having a footnote. Descriptive statistics on those pages can be found here:
# Let's look at the 'C' tag
contextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'C']
contextual_footnotes_df['footnotes_present'].value_counts()
1 346 2 12 0 8 3 1 Name: footnotes_present, dtype: int64
# Plot the number of C tagged footnotes in each year
ax = contextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().plot(kind="bar", figsize=(15, 5), title="Contextual footnotes in our one-footnote dataset, by year")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Date")
ax.set_ylabel("Number of volumes")
plt.legend(["Footnotes with tag 'C'"])
<matplotlib.legend.Legend at 0x1475b9290>
# Plot the number of C tagged volumes in each year
ax = contextual_footnotes_df.groupby('Date')[['tag']].count().plot(kind="bar", figsize=(15, 5), title="Volumes with contextual footnotes in our one-footnote dataset, by year")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Date")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'C'"])
<matplotlib.legend.Legend at 0x1473985d0>
# Plot the nubmer of C tagged footnotes in each decade
ax = contextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Contextual footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'C'"])
<matplotlib.legend.Legend at 0x14768dbd0>
# Create a dataframe of all volumes (including those misclassified in one-footnote-dateset)
all_footnotes_by_decade = one_footnote_tagged_new_df.groupby('decade')[['tag']].count().reset_index()
#contextual_footnotes_by_decade = contextual_footnotes_df.groupby('decade')[['tag']].count().reset_index()
contextual_footnotes_by_decade = contextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_C = pd.merge_ordered(all_footnotes_by_decade,contextual_footnotes_by_decade, on='decade').fillna(0)
merged_C
| decade | tag | footnotes_present | |
|---|---|---|---|
| 0 | 1690 | 1 | 0.0 |
| 1 | 1700 | 20 | 2.0 |
| 2 | 1710 | 36 | 10.0 |
| 3 | 1720 | 54 | 13.0 |
| 4 | 1730 | 44 | 21.0 |
| 5 | 1740 | 75 | 18.0 |
| 6 | 1750 | 121 | 31.0 |
| 7 | 1760 | 184 | 53.0 |
| 8 | 1770 | 236 | 63.0 |
| 9 | 1780 | 255 | 53.0 |
| 10 | 1790 | 396 | 94.0 |
| 11 | 1800 | 62 | 15.0 |
#merged_C['volumes_with_footnotes/all_volumes'] = merged_C['tag_y'] / merged_C['tag_x']
merged_C['volumes_with_footnotes/all_volumes'] = merged_C['footnotes_present'] / merged_C['tag']
merged_C
| decade | tag | footnotes_present | volumes_with_footnotes/all_volumes | |
|---|---|---|---|---|
| 0 | 1690 | 1 | 0.0 | 0.000000 |
| 1 | 1700 | 20 | 2.0 | 0.100000 |
| 2 | 1710 | 36 | 10.0 | 0.277778 |
| 3 | 1720 | 54 | 13.0 | 0.240741 |
| 4 | 1730 | 44 | 21.0 | 0.477273 |
| 5 | 1740 | 75 | 18.0 | 0.240000 |
| 6 | 1750 | 121 | 31.0 | 0.256198 |
| 7 | 1760 | 184 | 53.0 | 0.288043 |
| 8 | 1770 | 236 | 63.0 | 0.266949 |
| 9 | 1780 | 255 | 53.0 | 0.207843 |
| 10 | 1790 | 396 | 94.0 | 0.237374 |
| 11 | 1800 | 62 | 15.0 | 0.241935 |
# Create a dataframe of all volumes (including those misclassified in one-footnote-dateset)
all_footnotes_by_date = one_footnote_tagged_new_df.groupby('Date')[['tag']].count().reset_index()
#contextual_footnotes_by_date = contextual_footnotes_df.groupby('Date')[['tag']].count().reset_index()
contextual_footnotes_by_date = contextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_C_date = pd.merge_ordered(all_footnotes_by_date,contextual_footnotes_by_date, on='Date').fillna(0)
merged_C_date['volumes_with_footnotes/all_volumes'] = merged_C_date['footnotes_present'] / merged_C_date['tag']
merged_C_date
| Date | tag | footnotes_present | volumes_with_footnotes/all_volumes | |
|---|---|---|---|---|
| 0 | 1697 | 1 | 0.0 | 0.000000 |
| 1 | 1701 | 2 | 1.0 | 0.500000 |
| 2 | 1702 | 3 | 0.0 | 0.000000 |
| 3 | 1703 | 2 | 0.0 | 0.000000 |
| 4 | 1704 | 1 | 0.0 | 0.000000 |
| 5 | 1705 | 5 | 1.0 | 0.200000 |
| 6 | 1706 | 2 | 0.0 | 0.000000 |
| 7 | 1707 | 2 | 0.0 | 0.000000 |
| 8 | 1708 | 1 | 0.0 | 0.000000 |
| 9 | 1709 | 2 | 0.0 | 0.000000 |
| 10 | 1710 | 8 | 1.0 | 0.125000 |
| 11 | 1711 | 3 | 1.0 | 0.333333 |
| 12 | 1712 | 3 | 2.0 | 0.666667 |
| 13 | 1713 | 2 | 0.0 | 0.000000 |
| 14 | 1715 | 6 | 2.0 | 0.333333 |
| 15 | 1716 | 3 | 1.0 | 0.333333 |
| 16 | 1717 | 2 | 0.0 | 0.000000 |
| 17 | 1718 | 3 | 0.0 | 0.000000 |
| 18 | 1719 | 6 | 3.0 | 0.500000 |
| 19 | 1720 | 6 | 1.0 | 0.166667 |
| 20 | 1721 | 5 | 2.0 | 0.400000 |
| 21 | 1722 | 3 | 0.0 | 0.000000 |
| 22 | 1723 | 4 | 2.0 | 0.500000 |
| 23 | 1724 | 4 | 0.0 | 0.000000 |
| 24 | 1725 | 8 | 1.0 | 0.125000 |
| 25 | 1726 | 7 | 2.0 | 0.285714 |
| 26 | 1727 | 7 | 3.0 | 0.428571 |
| 27 | 1728 | 7 | 2.0 | 0.285714 |
| 28 | 1729 | 3 | 0.0 | 0.000000 |
| 29 | 1730 | 3 | 2.0 | 0.666667 |
| 30 | 1731 | 4 | 1.0 | 0.250000 |
| 31 | 1732 | 5 | 3.0 | 0.600000 |
| 32 | 1733 | 4 | 0.0 | 0.000000 |
| 33 | 1734 | 3 | 2.0 | 0.666667 |
| 34 | 1735 | 8 | 6.0 | 0.750000 |
| 35 | 1736 | 6 | 0.0 | 0.000000 |
| 36 | 1737 | 7 | 3.0 | 0.428571 |
| 37 | 1738 | 2 | 2.0 | 1.000000 |
| 38 | 1739 | 2 | 2.0 | 1.000000 |
| 39 | 1740 | 6 | 3.0 | 0.500000 |
| 40 | 1741 | 10 | 1.0 | 0.100000 |
| 41 | 1742 | 12 | 5.0 | 0.416667 |
| 42 | 1743 | 8 | 3.0 | 0.375000 |
| 43 | 1744 | 7 | 0.0 | 0.000000 |
| 44 | 1745 | 2 | 1.0 | 0.500000 |
| 45 | 1746 | 8 | 1.0 | 0.125000 |
| 46 | 1747 | 5 | 0.0 | 0.000000 |
| 47 | 1748 | 5 | 0.0 | 0.000000 |
| 48 | 1749 | 12 | 4.0 | 0.333333 |
| 49 | 1750 | 12 | 3.0 | 0.250000 |
| 50 | 1751 | 13 | 2.0 | 0.153846 |
| 51 | 1752 | 16 | 3.0 | 0.187500 |
| 52 | 1753 | 12 | 1.0 | 0.083333 |
| 53 | 1754 | 13 | 3.0 | 0.230769 |
| 54 | 1755 | 15 | 5.0 | 0.333333 |
| 55 | 1756 | 10 | 5.0 | 0.500000 |
| 56 | 1757 | 12 | 4.0 | 0.333333 |
| 57 | 1758 | 7 | 0.0 | 0.000000 |
| 58 | 1759 | 11 | 5.0 | 0.454545 |
| 59 | 1760 | 17 | 13.0 | 0.764706 |
| 60 | 1761 | 19 | 5.0 | 0.263158 |
| 61 | 1762 | 11 | 1.0 | 0.090909 |
| 62 | 1763 | 10 | 4.0 | 0.400000 |
| 63 | 1764 | 10 | 0.0 | 0.000000 |
| 64 | 1765 | 17 | 6.0 | 0.352941 |
| 65 | 1766 | 22 | 5.0 | 0.227273 |
| 66 | 1767 | 30 | 4.0 | 0.133333 |
| 67 | 1768 | 20 | 9.0 | 0.450000 |
| 68 | 1769 | 28 | 6.0 | 0.214286 |
| 69 | 1770 | 25 | 8.0 | 0.320000 |
| 70 | 1771 | 19 | 6.0 | 0.315789 |
| 71 | 1772 | 27 | 3.0 | 0.111111 |
| 72 | 1773 | 29 | 7.0 | 0.241379 |
| 73 | 1774 | 28 | 11.0 | 0.392857 |
| 74 | 1775 | 29 | 7.0 | 0.241379 |
| 75 | 1776 | 26 | 5.0 | 0.192308 |
| 76 | 1777 | 10 | 2.0 | 0.200000 |
| 77 | 1778 | 20 | 5.0 | 0.250000 |
| 78 | 1779 | 23 | 9.0 | 0.391304 |
| 79 | 1780 | 23 | 3.0 | 0.130435 |
| 80 | 1781 | 13 | 5.0 | 0.384615 |
| 81 | 1782 | 17 | 3.0 | 0.176471 |
| 82 | 1783 | 35 | 8.0 | 0.228571 |
| 83 | 1784 | 27 | 4.0 | 0.148148 |
| 84 | 1785 | 26 | 6.0 | 0.230769 |
| 85 | 1786 | 25 | 5.0 | 0.200000 |
| 86 | 1787 | 23 | 4.0 | 0.173913 |
| 87 | 1788 | 34 | 11.0 | 0.323529 |
| 88 | 1789 | 32 | 4.0 | 0.125000 |
| 89 | 1790 | 37 | 11.0 | 0.297297 |
| 90 | 1791 | 37 | 4.0 | 0.108108 |
| 91 | 1792 | 24 | 5.0 | 0.208333 |
| 92 | 1793 | 44 | 9.0 | 0.204545 |
| 93 | 1794 | 38 | 7.0 | 0.184211 |
| 94 | 1795 | 42 | 15.0 | 0.357143 |
| 95 | 1796 | 51 | 17.0 | 0.333333 |
| 96 | 1797 | 41 | 9.0 | 0.219512 |
| 97 | 1798 | 35 | 7.0 | 0.200000 |
| 98 | 1799 | 47 | 10.0 | 0.212766 |
| 99 | 1800 | 62 | 15.0 | 0.241935 |
# Let's perform Pearson's test for correlation between number of volumes and number of footnotes
# We assume the null hypthoesis: there is no association between the variables
rho, p = pearsonr(merged_C_date['tag'], merged_C_date['footnotes_present'])
#print Pearson rank correlation and p-value
print("CORRELATION BETWEEN NUMBER OF VOLUMES AND NUMBER OF CONTEXTUAL FOOTNOTES (by year)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
CORRELATION BETWEEN NUMBER OF VOLUMES AND NUMBER OF CONTEXTUAL FOOTNOTES (by year) Pearson rank correlation: 0.8473261035742256 p-value: 1.0979638673677108e-28
# Let's perform Pearson's test for correlation between number of volumes and number of footnotes
# We assume the null hypthoesis: there is no association between the variables
rho, p = pearsonr(merged_C['tag'], merged_C['footnotes_present'])
#print Pearson rank correlation and p-value
print("CORRELATION BETWEEN NUMBER OF VOLUMES AND NUMBER OF CONTEXTUAL FOOTNOTES (by decade)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
CORRELATION BETWEEN NUMBER OF VOLUMES AND NUMBER OF CONTEXTUAL FOOTNOTES (by decade) Pearson rank correlation: 0.9842877224397641 p-value: 7.345801582116839e-09
This indicates that there is a strong (statistically significant) positive correlation between the number of volumes and the number of footnotes.
The p-value of 5.087400861535177e-09 is lower than 0.05, indicating that the correlation is statistically significant. The correlation coefficient of 0.9854062253339878 indicates a strong positive correlation
CORRELATION BETWEEN NUMBER OF VOLUMES AND NUMBER OF CONTEXTUAL FOOTNOTES (by decade) Pearson rank correlation: 0.9854062253339878 p-value: 5.087400861535177e-09
The distribution of contextual footnotes is correlated, as we would expect.
# Figure out what proportion of the larger population our subset is
merged_C['tag'].sum() / merged_C['footnotes_present'].sum()
3.9785522788203753
# ALT Figure out what proportion of the larger population our subset is
merged_C['footnotes_present'].sum() / merged_C['tag'].sum()
0.25134770889487873
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_C['decade'], merged_C['volumes_with_footnotes/all_volumes'])
#print Pearson rank correlation and p-value
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Pearson rank correlation: 0.33495174672369576 p-value: 0.28721119933081807
Now that we've confimred the correlation between footnotes and volumes, we've normalized our footnotes by the number of volumes that year.
What we're testing now is whether there is a clear linear increase in the proportion of contextual footnotes over time:
The correlation coefficient is 0.33495174672369576 indicates that there is a weak positive correlation between the normalized number of footnotes and the decade.
However p-value of 0.28721119933081807 is higher than 0.05, indicating that the correlation is not necessarily statistically significant.
Here, the null test has failed to be rejected. This does not mean that contextual footnotes keep pace with the number of volumes per year. See below for our piecewise analysis of the trend.
result = stats.linregress(merged_C['decade'], merged_C['volumes_with_footnotes/all_volumes'])
print(result)
print(f"The slope of the line measuring proportion of footnotes/decade is:{result.slope}")
LinregressResult(slope=0.0010446378092182436, intercept=-1.5867150952357072, rvalue=0.33495174672369576, pvalue=0.2872111993308181, stderr=0.0009292720096870338, intercept_stderr=1.6218969274357433) The slope of the line measuring proportion of footnotes/decade is:0.0010446378092182436
#esult = stats.linregress(merged_C['tag_y'], merged_C['tag_x'])
#print(result)
# Let's run a chi2 goodness of fit test
#ratio_of_footnotes = merged_C['tag_y'].sum() / merged_C['tag_x'].sum()
#stats.chisquare(f_obs=merged_C['tag_y'], f_exp=(merged_C['tag_x']*(ratio_of_footnotes)))
fig, ax = plt.subplots(1,1, figsize = (8,6))
label = merged_C["decade"]
x = np.arange(len(label))
rect2 = ax.bar(x,
merged_C["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax.bar(x,
merged_C["footnotes_present"],
label = "Contextual footnotes in volumes",
edgecolor = "black")
#add the labels to the axis
ax.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax.set_title("Contextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 20)
#set the ticks
ax.set_xticks(x)
ax.set_xticklabels(label)#add the legend
#using the labels of the bars
ax.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax.tick_params(axis = "x",
which = "both",
labelrotation = 90)
ax.tick_params(axis = "y",
which = "both",
labelsize = 10)
fig, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, figsize = (8,8))
label = merged_C["decade"]
x = np.arange(len(label))
rect2 = ax1.bar(x,
merged_C["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax1.bar(x,
merged_C["footnotes_present"],
#label = "Volumes with contextual footnotes",
label = "Contextual footnotes in volumes",
edgecolor = "black")
rect3 = ax2.plot(x,
merged_C["footnotes_present"]/merged_C["tag"],
label = "Proportion of Contextual Footnotes",
marker="o",
color="black",
markeredgecolor="black")
#add the labels to the axis
ax1.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax2.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax1.set_title("Contextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 10)
#set the ticks
ax1.set_xticks(x)
ax1.set_xticklabels(label)#add the legend
#using the labels of the bars
ax1.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax1.tick_params(axis = "x",
which = "both",
labelrotation = 0)
ax1.tick_params(axis = "y",
which = "both",
labelsize = 10)
#add the labels to the axis
#set the ticks
ax2.set_xticks(x)
ax2.set_xticklabels(label)#add the legend
#using the labels of the bars
ax2.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax2.tick_params(axis = "x",
which = "both",
labelrotation = 0)
# Creating a Linear Regression model on our data
#lin = LinearRegression()
#lin.fit(merged_C[['decade']], merged_C['tag_x'])
# Creating a plot
#ax = merged_C.plot.scatter(x='decade', y='tag_x', alpha=1)
#ax.plot(merged_C['decade'], lin.predict(merged_C[['decade']]), c='r')
#lin.coef_
# Creating a Linear Regression model on our data
#lin = LinearRegression()
#lin.fit(merged_C[['decade']], merged_C['tag_y'])
# Creating a plot
#ax = merged_C.plot.scatter(x='decade', y='tag_y', alpha=1)
#ax.plot(merged_C['decade'], lin.predict(merged_C[['decade']]), c='r')
#lin.coef_
#from sklearn.linear_model import LinearRegression, BayesianRidge
# Creating a Linear Regression model on our data
#lin = BayesianRidge()
#lin.fit(merged_C_date[['Date']], merged_C_date['tag_x'])
# Creating a plot
#ax = merged_C_date.plot.scatter(x='Date', y='tag_x', alpha=1)
#ax.plot(merged_C_date['Date'], lin.predict(merged_C_date[['Date']]), c='r')
#lin.score(merged_C_date[['Date']], merged_C_date['tag_x'])
#print(lin.coef_)
#model = BayesianRidge()
#model.fit(merged_C_date[['Date']], merged_C_date['tag_y'])
# Creating a plot
#ax = merged_C_date.plot.scatter(x='Date', y='tag_y', alpha=1)
#ax.plot(merged_C_date['Date'], model.predict(merged_C_date[['Date']]), c='r')
#model.score(merged_C_date[['Date']], merged_C_date['tag_x'])
#print(model.coef_)
#model = BayesianRidge()
#model.fit(merged_C_date[['Date']], merged_C_date['volumes_with_footnotes/all_volumes'])
# Creating a plot
#ax = merged_C_date.plot.scatter(x='Date', y='volumes_with_footnotes/all_volumes', alpha=1)
#ax.plot(merged_C_date['Date'], model.predict(merged_C_date[['Date']]), c='r')
#model.score(merged_C_date[['Date']], merged_C_date['volumes_with_footnotes/all_volumes'])
#print(model.coef_)
# Fitting a Linear Regression model to our data (binning by year, rather than decade)
#lin = LinearRegression()
#lin.fit(merged_C_date[['Date']], merged_C_date['volumes_with_footnotes/all_volumes'])
# Creating a scatterplot of our data (in blue)
#ax = merged_C_date.plot.scatter(x='Date', y='volumes_with_footnotes/all_volumes')
# Adding our plotted linear regression model (in orange)
#ax.plot(merged_C_date['Date'], lin.predict(merged_C_date[['Date']]), c='orange')
lin.score(merged_C_date[['Date']], merged_C_date['volumes_with_footnotes/all_volumes'])
-5.9937692597378245e-05
# Fitting a Linear Regression model to our data (binning by decade)
linear = LinearRegression()
linear.fit(merged_C[['decade']], merged_C['volumes_with_footnotes/all_volumes'])
# Creating a scatterplot of our data (in blue)
ax = merged_C.plot.scatter(x='decade', y='volumes_with_footnotes/all_volumes')
# Adding our plotted linear regression model (in orange)
ax.plot(merged_C['decade'], linear.predict(merged_C[['decade']]), c='orange')
linear.score(merged_C[['decade']], merged_C['volumes_with_footnotes/all_volumes'])
0.11219267263325494
xtrain = merged_C['decade']
ytrain = merged_C['volumes_with_footnotes/all_volumes']
model = sm.Logit(ytrain, xtrain).fit()
model.summary()
Optimization terminated successfully.
Current function value: 0.438753
Iterations 4
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:2383: RuntimeWarning: overflow encountered in exp
return 1/(1+np.exp(-X))
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:595: HessianInversionWarning: Inverting hessian failed, no bse or cov_params available
warnings.warn('Inverting hessian failed, no bse or cov_params '
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:2383: RuntimeWarning: overflow encountered in exp
return 1/(1+np.exp(-X))
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/base/model.py:595: HessianInversionWarning: Inverting hessian failed, no bse or cov_params available
warnings.warn('Inverting hessian failed, no bse or cov_params '
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:4465: RuntimeWarning: divide by zero encountered in scalar divide
return 1 - self.llf/self.llnull
| Dep. Variable: | volumes_with_footnotes/all_volumes | No. Observations: | 12 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 11 |
| Method: | MLE | Df Model: | 0 |
| Date: | Tue, 18 Mar 2025 | Pseudo R-squ.: | inf |
| Time: | 16:19:58 | Log-Likelihood: | -5.2650 |
| converged: | True | LL-Null: | 0.0000 |
| Covariance Type: | nonrobust | LLR p-value: | nan |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| decade | -0.0007 | 0.000 | -1.723 | 0.085 | -0.001 | 9.24e-05 |
#!pip install Rbeast
Here, again, we have a non-robust model and a pvalue greater than 0.05.
#o = rb.beast(merged_C['volumes_with_footnotes/all_volumes'], start=1690, deltat= 10, season='none') # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of Contextual Footnotes per Decade')
#rb.print(o)
#o = rb.beast( merged_C_date['volumes_with_footnotes/all_volumes'], start=1690, season='none', period=1.0) # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of contextual footnotes per year')
#rb.print(o)
x= list(merged_C['decade'])
y= list(merged_C['volumes_with_footnotes/all_volumes'])
pw_fit = piecewise_regression.Fit(x, y, n_breakpoints=1)
pw_fit.summary()
Breakpoint Regression Results
====================================================================================================
No. Observations 12
No. Model Parameters 4
Degrees of Freedom 8
Res. Sum of Squares 0.0388305
Total Sum of Squares 0.139092
R Squared 0.720829
Adjusted R Squared 0.561303
Converged: True
====================================================================================================
====================================================================================================
Estimate Std Err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------
const -15.1904 5.31 -2.8594 0.0212 -27.441 -2.94
alpha1 0.009 0.00312 2.8886 0.0202 0.0018152 0.016185
beta1 -0.0111746 0.0033 -3.3904 - -0.018775 -0.0035741
breakpoint1 1727.62 8.2 - - 1708.7 1746.5
----------------------------------------------------------------------------------------------------
These alphas(gradients of segments) are estimatedfrom betas(change in gradient)
----------------------------------------------------------------------------------------------------
alpha2 -0.00217459 0.00108 -2.0228 0.0777 -0.0046536 0.00030442
====================================================================================================
Davies test for existence of at least 1 breakpoint: p=3.32561e-10 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)
'\n Breakpoint Regression Results \n====================================================================================================\nNo. Observations 12\nNo. Model Parameters 4\nDegrees of Freedom 8\nRes. Sum of Squares 0.0388305\nTotal Sum of Squares 0.139092\nR Squared 0.720829\nAdjusted R Squared 0.561303\nConverged: True\n====================================================================================================\n====================================================================================================\n Estimate Std Err t P>|t| [0.025 0.975]\n----------------------------------------------------------------------------------------------------\nconst -15.1904 5.31 -2.8594 0.0212 -27.441 -2.94\nalpha1 0.009 0.00312 2.8886 0.0202 0.0018152 0.016185\nbeta1 -0.0111746 0.0033 -3.3904 - -0.018775 -0.0035741\nbreakpoint1 1727.62 8.2 - - 1708.7 1746.5\n----------------------------------------------------------------------------------------------------\nThese alphas(gradients of segments) are estimatedfrom betas(change in gradient)\n----------------------------------------------------------------------------------------------------\nalpha2 -0.00217459 0.00108 -2.0228 0.0777 -0.0046536 0.00030442\n====================================================================================================\nDavies test for existence of at least 1 breakpoint: p=3.32561e-10 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)\n\n'
# Plot the data, fit, breakpoints and confidence intervals
plt.subplots(figsize=(6, 2))
pw_fit.plot_data(color="grey", s=20)
# Pass in standard matplotlib keywords to control any of the plots
pw_fit.plot_fit(color="red", linestyle='dashed', linewidth=2)
pw_fit.plot_breakpoints()
pw_fit.plot_breakpoint_confidence_intervals()
plt.xlabel("Decade")
plt.ylabel("Proportion")
plt.title("Proportion of Contextual Footnotes in One-Footnote Corpus (trend fitted)")
plt.show()
plt.close()
pw_results = pw_fit.get_results()
pw_estimates = pw_results["estimates"]
pw_results
{'davies': 3.3256113579359773e-10,
'estimates': {'const': {'estimate': -15.19037037037056,
'se': 5.312394529946326,
'confidence_interval': (-27.440774123355446, -2.9399666173856733),
't_stat': -2.859420603033419,
'p_t': 0.021170250807809463},
'beta1': {'estimate': -0.011174585681214082,
'se': 0.0032959520455812935,
'confidence_interval': (-0.018775064727196242, -0.003574106635231924),
't_stat': -3.3903969252814985,
'p_t': '-'},
'breakpoint1': {'estimate': 1727.6225406843112,
'se': 8.203198611200033,
'confidence_interval': (1708.705930766384, 1746.5391506022384),
't_stat': '-',
'p_t': '-'},
'alpha1': {'estimate': 0.009000000000000114,
'se': 0.0031157069319901016,
'confidence_interval': (0.0018151669312788032, 0.016184833068721424),
't_stat': 2.888590036371465,
'p_t': 0.02024290034426371},
'alpha2': {'estimate': -0.0021745856812139686,
'se': 0.0010750210233852848,
'confidence_interval': (-0.004653588606388241, 0.00030441724396030375),
't_stat': -2.022830841359837,
'p_t': 0.07771667784636126}},
'bic': -58.861838509838925,
'rss': 0.038830518744203256,
'converged': True}
print('Number of volumes with contextual footnotes in our one-footnote dataset:')
contextual_footnotes_df['Title'].count()
Number of volumes with contextual footnotes in our one-footnote dataset:
367
print('Titles of volumes with contextual footnotes (in our one-footnote dataset):')
contextual_footnotes_df[['Title','Vol_Number']]
Titles of volumes with contextual footnotes (in our one-footnote dataset):
| Title | Vol_Number | |
|---|---|---|
| 0 | Prince Arthur: an allegorical romance. The sto... | Volume 1 |
| 10 | Lettres de Sophie et du Chevalier de **, pour ... | Volume 2 |
| 12 | Female banishment: or, the woman hater. Origin... | Volume 1 |
| 13 | Female banishment: or, the woman hater. Origin... | Volume 2 |
| 14 | The illustrious French lovers; being the true ... | Volume 1 |
| ... | ... | ... |
| 1463 | Short rules for attaining the Italian language... | 0 |
| 1468 | The vicar of Wakefield, a tale supposed to be ... | 0 |
| 1471 | The expedition of Humphry Clinker. By Tobias S... | Volume 1 |
| 1472 | Beauties in prose and verse: or, the new, plea... | 0 |
| 1475 | Ben Johnson's jests: or the wit's pocket compa... | 0 |
367 rows × 2 columns
contextual_footnotes_df['Filename'].to_csv('../contextual_footnotes_filenames.csv')
genre_term = "memoir|Memoir"
dataframe = contextual_footnotes_df
corpus_name = "Contextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_contextual_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 12 Percent of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 0.0326975476839237
genre_term = "letter|Letter|lettre|Lettre"
dataframe = contextual_footnotes_df
corpus_name = "Contextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_contextual_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 34 Percent of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 0.09264305177111716
genre_term = "novel|Novel|roman|Roman"
dataframe = contextual_footnotes_df
corpus_name = "Contextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 36 Percent of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 0.09809264305177112
genre_term = "tale|Tale|recit|Recit"
dataframe = contextual_footnotes_df
corpus_name = "Contextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_contextual_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 36 Percent of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 0.09809264305177112
genre_term = "romance|Romance"
dataframe = contextual_footnotes_df
corpus_name = "Contextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 9 Percent of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 0.02452316076294278
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = contextual_footnotes_df
corpus_name = "Contextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 78 Percent of volumes in the Contextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 0.2125340599455041
# Proportions of Memoirs, Letters, Tales
proportions_contextual = [proportion_contextual_memoirs, proportion_contextual_letters, proportion_contextual_tale]
proportions_contextual
[0.0326975476839237, 0.09264305177111716, 0.09809264305177112]
# Let's look at the 'T' tag
translation_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'T']
# Plot the nubmer of T tagged footnotes in each year
ax = translation_footnotes_df.groupby('Date')[['footnotes_present']].sum().plot(kind="bar", figsize=(15, 5), title="Translation footnotes in our one-footnote dataset, by year")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Date")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'T'"])
<matplotlib.legend.Legend at 0x1480d1290>
# Plot the nubmer of T tagged footnotes in each decade
ax = translation_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Translation footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'T'"])
<matplotlib.legend.Legend at 0x148135e90>
# Redo chart so that the axis range matches contextual footnotes
ax = translation_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Translation footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
ax.set_ylim(0, 100)
plt.legend(["Volumes with tag 'T'"])
<matplotlib.legend.Legend at 0x1482477d0>
# Create a dataframe of all volumes (including those misclassified in one-footnote-dataset)
all_footnotes_by_decade = one_footnote_tagged_new_df.groupby('decade')[['tag']].count().reset_index()
#not_translation_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] != 'T']
#not_translation_footnotes_by_decade = not_translation_footnotes_df.groupby('decade')[['tag']].count().reset_index()
translation_footnotes_by_decade = translation_footnotes_df.groupby('decade')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_T = pd.merge_ordered(all_footnotes_by_decade,translation_footnotes_by_decade, on='decade').fillna(0)
merged_T['ratio'] = merged_T['footnotes_present'] / merged_T['tag']
merged_T['volumes_with_footnotes/all_volumes'] = merged_T['footnotes_present'] / merged_T['tag']
merged_T
| decade | tag | footnotes_present | ratio | volumes_with_footnotes/all_volumes | |
|---|---|---|---|---|---|
| 0 | 1690 | 1 | 0.0 | 0.000000 | 0.000000 |
| 1 | 1700 | 20 | 2.0 | 0.100000 | 0.100000 |
| 2 | 1710 | 36 | 2.0 | 0.055556 | 0.055556 |
| 3 | 1720 | 54 | 7.0 | 0.129630 | 0.129630 |
| 4 | 1730 | 44 | 6.0 | 0.136364 | 0.136364 |
| 5 | 1740 | 75 | 4.0 | 0.053333 | 0.053333 |
| 6 | 1750 | 121 | 8.0 | 0.066116 | 0.066116 |
| 7 | 1760 | 184 | 16.0 | 0.086957 | 0.086957 |
| 8 | 1770 | 236 | 20.0 | 0.084746 | 0.084746 |
| 9 | 1780 | 255 | 15.0 | 0.058824 | 0.058824 |
| 10 | 1790 | 396 | 28.0 | 0.070707 | 0.070707 |
| 11 | 1800 | 62 | 1.0 | 0.016129 | 0.016129 |
# Create a dataframe of all volumes (including those misclassified in one-footnote-dateset)
all_footnotes_by_date = one_footnote_tagged_new_df.groupby('Date')[['tag']].count().reset_index()
translation_footnotes_by_date = translation_footnotes_df.groupby('Date')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_T_date = pd.merge_ordered(all_footnotes_by_date,translation_footnotes_by_date, on='Date').fillna(0)
merged_T_date['volumes_with_footnotes/all_volumes'] = merged_T_date['footnotes_present'] / merged_T_date['tag']
merged_T_date
| Date | tag | footnotes_present | volumes_with_footnotes/all_volumes | |
|---|---|---|---|---|
| 0 | 1697 | 1 | 0.0 | 0.000000 |
| 1 | 1701 | 2 | 1.0 | 0.500000 |
| 2 | 1702 | 3 | 0.0 | 0.000000 |
| 3 | 1703 | 2 | 0.0 | 0.000000 |
| 4 | 1704 | 1 | 0.0 | 0.000000 |
| 5 | 1705 | 5 | 1.0 | 0.200000 |
| 6 | 1706 | 2 | 0.0 | 0.000000 |
| 7 | 1707 | 2 | 0.0 | 0.000000 |
| 8 | 1708 | 1 | 0.0 | 0.000000 |
| 9 | 1709 | 2 | 0.0 | 0.000000 |
| 10 | 1710 | 8 | 1.0 | 0.125000 |
| 11 | 1711 | 3 | 0.0 | 0.000000 |
| 12 | 1712 | 3 | 1.0 | 0.333333 |
| 13 | 1713 | 2 | 0.0 | 0.000000 |
| 14 | 1715 | 6 | 0.0 | 0.000000 |
| 15 | 1716 | 3 | 0.0 | 0.000000 |
| 16 | 1717 | 2 | 0.0 | 0.000000 |
| 17 | 1718 | 3 | 0.0 | 0.000000 |
| 18 | 1719 | 6 | 0.0 | 0.000000 |
| 19 | 1720 | 6 | 1.0 | 0.166667 |
| 20 | 1721 | 5 | 1.0 | 0.200000 |
| 21 | 1722 | 3 | 0.0 | 0.000000 |
| 22 | 1723 | 4 | 0.0 | 0.000000 |
| 23 | 1724 | 4 | 0.0 | 0.000000 |
| 24 | 1725 | 8 | 1.0 | 0.125000 |
| 25 | 1726 | 7 | 1.0 | 0.142857 |
| 26 | 1727 | 7 | 2.0 | 0.285714 |
| 27 | 1728 | 7 | 1.0 | 0.142857 |
| 28 | 1729 | 3 | 0.0 | 0.000000 |
| 29 | 1730 | 3 | 1.0 | 0.333333 |
| 30 | 1731 | 4 | 0.0 | 0.000000 |
| 31 | 1732 | 5 | 1.0 | 0.200000 |
| 32 | 1733 | 4 | 0.0 | 0.000000 |
| 33 | 1734 | 3 | 1.0 | 0.333333 |
| 34 | 1735 | 8 | 1.0 | 0.125000 |
| 35 | 1736 | 6 | 0.0 | 0.000000 |
| 36 | 1737 | 7 | 2.0 | 0.285714 |
| 37 | 1738 | 2 | 0.0 | 0.000000 |
| 38 | 1739 | 2 | 0.0 | 0.000000 |
| 39 | 1740 | 6 | 0.0 | 0.000000 |
| 40 | 1741 | 10 | 0.0 | 0.000000 |
| 41 | 1742 | 12 | 1.0 | 0.083333 |
| 42 | 1743 | 8 | 0.0 | 0.000000 |
| 43 | 1744 | 7 | 0.0 | 0.000000 |
| 44 | 1745 | 2 | 0.0 | 0.000000 |
| 45 | 1746 | 8 | 1.0 | 0.125000 |
| 46 | 1747 | 5 | 0.0 | 0.000000 |
| 47 | 1748 | 5 | 1.0 | 0.200000 |
| 48 | 1749 | 12 | 1.0 | 0.083333 |
| 49 | 1750 | 12 | 1.0 | 0.083333 |
| 50 | 1751 | 13 | 1.0 | 0.076923 |
| 51 | 1752 | 16 | 0.0 | 0.000000 |
| 52 | 1753 | 12 | 0.0 | 0.000000 |
| 53 | 1754 | 13 | 2.0 | 0.153846 |
| 54 | 1755 | 15 | 1.0 | 0.066667 |
| 55 | 1756 | 10 | 1.0 | 0.100000 |
| 56 | 1757 | 12 | 0.0 | 0.000000 |
| 57 | 1758 | 7 | 0.0 | 0.000000 |
| 58 | 1759 | 11 | 2.0 | 0.181818 |
| 59 | 1760 | 17 | 2.0 | 0.117647 |
| 60 | 1761 | 19 | 2.0 | 0.105263 |
| 61 | 1762 | 11 | 2.0 | 0.181818 |
| 62 | 1763 | 10 | 1.0 | 0.100000 |
| 63 | 1764 | 10 | 1.0 | 0.100000 |
| 64 | 1765 | 17 | 2.0 | 0.117647 |
| 65 | 1766 | 22 | 2.0 | 0.090909 |
| 66 | 1767 | 30 | 0.0 | 0.000000 |
| 67 | 1768 | 20 | 3.0 | 0.150000 |
| 68 | 1769 | 28 | 1.0 | 0.035714 |
| 69 | 1770 | 25 | 2.0 | 0.080000 |
| 70 | 1771 | 19 | 0.0 | 0.000000 |
| 71 | 1772 | 27 | 2.0 | 0.074074 |
| 72 | 1773 | 29 | 6.0 | 0.206897 |
| 73 | 1774 | 28 | 3.0 | 0.107143 |
| 74 | 1775 | 29 | 1.0 | 0.034483 |
| 75 | 1776 | 26 | 3.0 | 0.115385 |
| 76 | 1777 | 10 | 1.0 | 0.100000 |
| 77 | 1778 | 20 | 2.0 | 0.100000 |
| 78 | 1779 | 23 | 0.0 | 0.000000 |
| 79 | 1780 | 23 | 2.0 | 0.086957 |
| 80 | 1781 | 13 | 3.0 | 0.230769 |
| 81 | 1782 | 17 | 0.0 | 0.000000 |
| 82 | 1783 | 35 | 1.0 | 0.028571 |
| 83 | 1784 | 27 | 1.0 | 0.037037 |
| 84 | 1785 | 26 | 0.0 | 0.000000 |
| 85 | 1786 | 25 | 4.0 | 0.160000 |
| 86 | 1787 | 23 | 0.0 | 0.000000 |
| 87 | 1788 | 34 | 3.0 | 0.088235 |
| 88 | 1789 | 32 | 1.0 | 0.031250 |
| 89 | 1790 | 37 | 3.0 | 0.081081 |
| 90 | 1791 | 37 | 2.0 | 0.054054 |
| 91 | 1792 | 24 | 2.0 | 0.083333 |
| 92 | 1793 | 44 | 4.0 | 0.090909 |
| 93 | 1794 | 38 | 3.0 | 0.078947 |
| 94 | 1795 | 42 | 5.0 | 0.119048 |
| 95 | 1796 | 51 | 2.0 | 0.039216 |
| 96 | 1797 | 41 | 2.0 | 0.048780 |
| 97 | 1798 | 35 | 2.0 | 0.057143 |
| 98 | 1799 | 47 | 3.0 | 0.063830 |
| 99 | 1800 | 62 | 1.0 | 0.016129 |
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_T_date['tag'], merged_T_date['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF TRANSLATION FOOTNOTES (by year)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF TRANSLATION FOOTNOTES (by year) Pearson rank correlation: 0.6097139269256125 p-value: 1.665292921087675e-11
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_T['tag'], merged_T['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF TRANSLATION FOOTNOTES (by decade)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF TRANSLATION FOOTNOTES (by decade) Pearson rank correlation: 0.9638388366135474 p-value: 4.582542266825464e-07
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_T['decade'], merged_T['volumes_with_footnotes/all_volumes'])
#print Pearson rank correlation and p-value
print("Correlation between DECADE and PROPORTION OF TRANSLATION FOOTNOTES")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between DECADE and PROPORTION OF TRANSLATION FOOTNOTES Pearson rank correlation: -0.13275656093795804 p-value: 0.6808509219004162
While the Pearson rank correlation of -0.13275656093795804 indicates a very, very weak negative correlation, the p-value of 0.6808509219004162 is not <0.05, indicating that this is not a statistically significant correlation.
Here, the null test has failed to be rejected. This does not mean that translation footnotes keep pace with the number of volumes per year. See below for our piecewise analysis of the trend.
xtrain = merged_T['decade']
ytrain = merged_T['volumes_with_footnotes/all_volumes']
model = sm.Logit(ytrain, xtrain).fit()
model.summary()
Optimization terminated successfully.
Current function value: 0.107157
Iterations 6
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:4465: RuntimeWarning: divide by zero encountered in scalar divide return 1 - self.llf/self.llnull
| Dep. Variable: | volumes_with_footnotes/all_volumes | No. Observations: | 12 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 11 |
| Method: | MLE | Df Model: | 0 |
| Date: | Tue, 18 Mar 2025 | Pseudo R-squ.: | inf |
| Time: | 16:42:33 | Log-Likelihood: | -1.2859 |
| converged: | True | LL-Null: | 0.0000 |
| Covariance Type: | nonrobust | LLR p-value: | nan |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| decade | -0.0015 | 0.001 | -2.288 | 0.022 | -0.003 | -0.000 |
# Figure out what proportion of the larger population our subset is
proportion = merged_T['tag'].sum() / merged_T['footnotes_present'].sum()
proportion
13.614678899082568
# Let's run a chi2 goodness of fit test
#stats.chisquare(f_obs=merged_T['tag_y'], f_exp=merged_T['tag_x']/proportion)
fig, ax = plt.subplots(1,1, figsize = (8,6))
label = merged_T["decade"]
x = np.arange(len(label))
rect2 = ax.bar(x,
merged_T["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax.bar(x,
merged_T["footnotes_present"],
label = "Translation footnotes in volumes",
edgecolor = "black")
#add the labels to the axis
ax.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax.set_title("Translation footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 20)
#set the ticks
ax.set_xticks(x)
ax.set_xticklabels(label)#add the legend
#using the labels of the bars
ax.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax.tick_params(axis = "x",
which = "both",
labelrotation = 90)
ax.tick_params(axis = "y",
which = "both",
labelsize = 10)
fig, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, figsize = (8,8))
label = merged_T["decade"]
x = np.arange(len(label))
rect2 = ax1.bar(x,
merged_T["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax1.bar(x,
merged_T["footnotes_present"],
label = "Translation footnotes in volumes",
edgecolor = "black")
rect3 = ax2.plot(x,
merged_T["footnotes_present"]/merged_T["tag"],
label = "Proportion of Translation Footnotes",
marker="o",
color="black",
markeredgecolor="black")
#add the labels to the axis
ax1.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax2.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax1.set_title("Translation footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 10)
#set the ticks
ax1.set_xticks(x)
ax1.set_xticklabels(label)#add the legend
#using the labels of the bars
ax1.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax1.tick_params(axis = "x",
which = "both",
labelrotation = 0)
ax1.tick_params(axis = "y",
which = "both",
labelsize = 10)
###
#add the labels to the axis
#set the ticks
ax2.set_xticks(x)
ax2.set_xticklabels(label)#add the legend
ax2.set_ylim(0, 0.4)
#using the labels of the bars
ax2.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax2.tick_params(axis = "x",
which = "both",
labelrotation = 0)
# Fitting a Linear Regression model to our data (binning by decade)
linear = LinearRegression()
linear.fit(merged_T[['decade']], merged_T['volumes_with_footnotes/all_volumes'])
# Creating a scatterplot of our data (in blue)
ax = merged_T.plot.scatter(x='decade', y='volumes_with_footnotes/all_volumes')
# Adding our plotted linear regression model (in orange)
ax.plot(merged_T['decade'], linear.predict(merged_T[['decade']]), c='orange')
linear.score(merged_T[['decade']], merged_T['volumes_with_footnotes/all_volumes'])
0.01762430447207375
#define response variable
y = merged_T['volumes_with_footnotes/all_volumes']
#define predictor variables
x = merged_T[['decade']]
#add constant to predictor variables
x = sm.add_constant(x)
#fit linear regression model
model = sm.OLS(y, x).fit()
#view model summary
print(model.summary())
OLS Regression Results
==============================================================================================
Dep. Variable: volumes_with_footnotes/all_volumes R-squared: 0.018
Model: OLS Adj. R-squared: -0.081
Method: Least Squares F-statistic: 0.1794
Date: Tue, 18 Mar 2025 Prob (F-statistic): 0.681
Time: 16:47:06 Log-Likelihood: 22.172
No. Observations: 12 AIC: -40.34
Df Residuals: 10 BIC: -39.37
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.3297 0.610 0.541 0.600 -1.029 1.688
decade -0.0001 0.000 -0.424 0.681 -0.001 0.001
==============================================================================
Omnibus: 0.579 Durbin-Watson: 1.654
Prob(Omnibus): 0.749 Jarque-Bera (JB): 0.210
Skew: -0.301 Prob(JB): 0.900
Kurtosis: 2.763 Cond. No. 8.82e+04
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.82e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/scipy/stats/_stats_py.py:1736: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=12
warnings.warn("kurtosistest only valid for n>=20 ... continuing "
#define response variable
y = merged_T_date['volumes_with_footnotes/all_volumes']
#define predictor variables
x = merged_T_date[['Date']]
#add constant to predictor variables
x = sm.add_constant(x)
#fit linear regression model
model = sm.OLS(y, x).fit()
#view model summary
print(model.summary())
OLS Regression Results
==============================================================================================
Dep. Variable: volumes_with_footnotes/all_volumes R-squared: 0.002
Model: OLS Adj. R-squared: -0.008
Method: Least Squares F-statistic: 0.1830
Date: Tue, 18 Mar 2025 Prob (F-statistic): 0.670
Time: 16:47:15 Log-Likelihood: 93.976
No. Observations: 100 AIC: -184.0
Df Residuals: 98 BIC: -178.7
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const 0.3242 0.574 0.565 0.573 -0.815 1.463
Date -0.0001 0.000 -0.428 0.670 -0.001 0.001
==============================================================================
Omnibus: 39.322 Durbin-Watson: 2.329
Prob(Omnibus): 0.000 Jarque-Bera (JB): 81.148
Skew: 1.564 Prob(JB): 2.39e-18
Kurtosis: 6.112 Cond. No. 1.05e+05
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.05e+05. This might indicate that there are
strong multicollinearity or other numerical problems.
#o = rb.beast( merged_T['volumes_with_footnotes/all_volumes'], start=1690, deltat= 10, season='none') # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of translation footnotes per decade')
#rb.print(o)
#o = rb.beast( merged_T_date['volumes_with_footnotes/all_volumes'], start=1690, season='none', period=1.0) # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of translation footnotes per year')
#rb.print(o)
x= list(merged_T['decade'])
y= list(merged_T['volumes_with_footnotes/all_volumes'])
pw_fit = piecewise_regression.Fit(x, y, n_breakpoints=1)
pw_fit.summary()
Breakpoint Regression Results
====================================================================================================
No. Observations 12
No. Model Parameters 4
Degrees of Freedom 8
Res. Sum of Squares 0.00837614
Total Sum of Squares 0.0177657
R Squared 0.528522
Adjusted R Squared 0.259106
Converged: True
====================================================================================================
====================================================================================================
Estimate Std Err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------
const -16.9 7.76 -2.1788 0.061 -34.786 0.98643
alpha1 0.01 0.00458 2.1853 0.0604 -0.00055242 0.020552
beta1 -0.0106303 0.00459 -2.316 - -0.021215 -4.5908e-05
breakpoint1 1700.99 3.93 - - 1691.9 1710.1
----------------------------------------------------------------------------------------------------
These alphas(gradients of segments) are estimatedfrom betas(change in gradient)
----------------------------------------------------------------------------------------------------
alpha2 -0.000630254 0.000356 -1.7692 0.115 -0.0014518 0.00019125
====================================================================================================
Davies test for existence of at least 1 breakpoint: p=0.00931248 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)
'\n Breakpoint Regression Results \n====================================================================================================\nNo. Observations 12\nNo. Model Parameters 4\nDegrees of Freedom 8\nRes. Sum of Squares 0.00837614\nTotal Sum of Squares 0.0177657\nR Squared 0.528522\nAdjusted R Squared 0.259106\nConverged: True\n====================================================================================================\n====================================================================================================\n Estimate Std Err t P>|t| [0.025 0.975]\n----------------------------------------------------------------------------------------------------\nconst -16.9 7.76 -2.1788 0.061 -34.786 0.98643\nalpha1 0.01 0.00458 2.1853 0.0604 -0.00055242 0.020552\nbeta1 -0.0106303 0.00459 -2.316 - -0.021215 -4.5908e-05\nbreakpoint1 1700.99 3.93 - - 1691.9 1710.1\n----------------------------------------------------------------------------------------------------\nThese alphas(gradients of segments) are estimatedfrom betas(change in gradient)\n----------------------------------------------------------------------------------------------------\nalpha2 -0.000630254 0.000356 -1.7692 0.115 -0.0014518 0.00019125\n====================================================================================================\nDavies test for existence of at least 1 breakpoint: p=0.00931248 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)\n\n'
# Plot the data, fit, breakpoints and confidence intervals
plt.subplots(figsize=(6, 2))
pw_fit.plot_data(color="grey", s=20)
# Pass in standard matplotlib keywords to control any of the plots
pw_fit.plot_fit(color="red", linestyle='dashed', linewidth=2)
pw_fit.plot_breakpoints()
pw_fit.plot_breakpoint_confidence_intervals()
plt.xlabel("Decade")
plt.ylabel("Proportion")
plt.ylim(0, 0.4)
plt.title("Proportion of Translation Footnotes in One-Footnote Corpus (trend fitted)")
plt.show()
plt.close()
print('Number of volumes with translation footnotes in our one-footnote dataset:')
translation_footnotes_df['Title'].count()
Number of volumes with translation footnotes in our one-footnote dataset:
109
print('Titles of volumes with translation footnotes (in our one-footnote dataset):')
translation_footnotes_df[['Title','Vol_Number']]
Titles of volumes with translation footnotes (in our one-footnote dataset):
| Title | Vol_Number | |
|---|---|---|
| 8 | Galatea a pastoral romance; imitated from Cerv... | 0 |
| 10 | Lettres de Sophie et du Chevalier de **, pour ... | Volume 2 |
| 11 | Les delices du sentiment; or the passionate lo... | 0 |
| 12 | Female banishment: or, the woman hater. Origin... | Volume 1 |
| 14 | The illustrious French lovers; being the true ... | Volume 1 |
| ... | ... | ... |
| 1214 | Arabian nights entertainments: consisting of o... | Volume 4 |
| 1241 | The tusculan disputations of Marcus Tullius Ci... | 0 |
| 1318 | The indian cottage. By James Henry Bernardin d... | 0 |
| 1336 | A sentimental journey through France and Italy... | Volume 2 |
| 1468 | The vicar of Wakefield, a tale supposed to be ... | 0 |
109 rows × 2 columns
translation_footnotes_df['Filename'].to_csv('../translation_footnotes_filenames.csv')
genre_term = "memoir|Memoir"
dataframe = translation_footnotes_df
corpus_name = "Translation Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_translation_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 4 Percent of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 0.03669724770642202
genre_term = "letter|Letter|lettre|Lettre"
dataframe = translation_footnotes_df
corpus_name = "Translation Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_translation_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 11 Percent of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 0.10091743119266056
genre_term = "novel|Novel|roman|Roman"
dataframe = translation_footnotes_df
corpus_name = "Translation Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 12 Percent of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 0.11009174311926606
genre_term = "tale|Tale|recit|Recit"
dataframe = translation_footnotes_df
corpus_name = "Translation Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_translation_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 11 Percent of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 0.10091743119266056
genre_term = "romance|Romance"
dataframe = translation_footnotes_df
corpus_name = "Translation Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 5 Percent of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 0.045871559633027525
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = translation_footnotes_df
corpus_name = "Translation Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 31 Percent of volumes in the Translation Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 0.28440366972477066
# Proportions of Memoirs, Letters, Tales
proportions_translation = [proportion_translation_memoirs, proportion_translation_letters, proportion_translation_tale]
proportions_translation
[0.03669724770642202, 0.10091743119266056, 0.10091743119266056]
# Let's look at the 'IT' tag
intertextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'IT']
# Plot the nubmer of IT tagged footnotes in each year
ax = intertextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().plot(kind="bar", figsize=(15, 5), title="Intertextual footnotes in our one-footnote dataset, by year")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Date")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'IT'"])
<matplotlib.legend.Legend at 0x147f96390>
# Plot the nubmer of IT tagged footnotes in each decade
ax = intertextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Intertextual footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'IT'"])
<matplotlib.legend.Legend at 0x148b68b10>
# Redo axis so it matches scale of contextual
ax = intertextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Intertextual footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
ax.set_ylim(0, 100)
plt.legend(["Volumes with tag 'IT'"])
<matplotlib.legend.Legend at 0x148792fd0>
# Create a dataframe of all volumes (including those misclassified in one-footnote-dataset)
all_footnotes_by_decade = one_footnote_tagged_new_df.groupby('decade')[['tag']].count().reset_index()
#not_intertextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] != 'IT']
#not_intertextual_footnotes_by_decade = not_intertextual_footnotes_df.groupby('decade')[['tag']].count().reset_index()
intertextual_footnotes_by_decade = intertextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_IT = pd.merge_ordered(all_footnotes_by_decade,intertextual_footnotes_by_decade, on='decade').fillna(0)
merged_IT['ratio'] = merged_IT['footnotes_present'] / merged_IT['tag']
merged_IT
| decade | tag | footnotes_present | ratio | |
|---|---|---|---|---|
| 0 | 1690 | 1 | 0.0 | 0.000000 |
| 1 | 1700 | 20 | 0.0 | 0.000000 |
| 2 | 1710 | 36 | 2.0 | 0.055556 |
| 3 | 1720 | 54 | 6.0 | 0.111111 |
| 4 | 1730 | 44 | 4.0 | 0.090909 |
| 5 | 1740 | 75 | 17.0 | 0.226667 |
| 6 | 1750 | 121 | 13.0 | 0.107438 |
| 7 | 1760 | 184 | 31.0 | 0.168478 |
| 8 | 1770 | 236 | 30.0 | 0.127119 |
| 9 | 1780 | 255 | 37.0 | 0.145098 |
| 10 | 1790 | 396 | 40.0 | 0.101010 |
| 11 | 1800 | 62 | 9.0 | 0.145161 |
# Create a dataframe of all volumes (including those misclassified in one-footnote-dataset)
all_footnotes_by_date = one_footnote_tagged_new_df.groupby('Date')[['tag']].count().reset_index()
#not_intertextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] != 'IT']
#not_intertextual_footnotes_by_decade = not_intertextual_footnotes_df.groupby('decade')[['tag']].count().reset_index()
intertextual_footnotes_by_date = intertextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_IT_date = pd.merge_ordered(all_footnotes_by_date,intertextual_footnotes_by_date, on='Date').fillna(0)
merged_IT_date['ratio'] = merged_IT_date['footnotes_present'] / merged_IT_date['tag']
merged_IT_date
| Date | tag | footnotes_present | ratio | |
|---|---|---|---|---|
| 0 | 1697 | 1 | 0.0 | 0.000000 |
| 1 | 1701 | 2 | 0.0 | 0.000000 |
| 2 | 1702 | 3 | 0.0 | 0.000000 |
| 3 | 1703 | 2 | 0.0 | 0.000000 |
| 4 | 1704 | 1 | 0.0 | 0.000000 |
| 5 | 1705 | 5 | 0.0 | 0.000000 |
| 6 | 1706 | 2 | 0.0 | 0.000000 |
| 7 | 1707 | 2 | 0.0 | 0.000000 |
| 8 | 1708 | 1 | 0.0 | 0.000000 |
| 9 | 1709 | 2 | 0.0 | 0.000000 |
| 10 | 1710 | 8 | 1.0 | 0.125000 |
| 11 | 1711 | 3 | 1.0 | 0.333333 |
| 12 | 1712 | 3 | 0.0 | 0.000000 |
| 13 | 1713 | 2 | 0.0 | 0.000000 |
| 14 | 1715 | 6 | 0.0 | 0.000000 |
| 15 | 1716 | 3 | 0.0 | 0.000000 |
| 16 | 1717 | 2 | 0.0 | 0.000000 |
| 17 | 1718 | 3 | 0.0 | 0.000000 |
| 18 | 1719 | 6 | 0.0 | 0.000000 |
| 19 | 1720 | 6 | 2.0 | 0.333333 |
| 20 | 1721 | 5 | 1.0 | 0.200000 |
| 21 | 1722 | 3 | 0.0 | 0.000000 |
| 22 | 1723 | 4 | 0.0 | 0.000000 |
| 23 | 1724 | 4 | 0.0 | 0.000000 |
| 24 | 1725 | 8 | 0.0 | 0.000000 |
| 25 | 1726 | 7 | 1.0 | 0.142857 |
| 26 | 1727 | 7 | 0.0 | 0.000000 |
| 27 | 1728 | 7 | 0.0 | 0.000000 |
| 28 | 1729 | 3 | 2.0 | 0.666667 |
| 29 | 1730 | 3 | 1.0 | 0.333333 |
| 30 | 1731 | 4 | 0.0 | 0.000000 |
| 31 | 1732 | 5 | 0.0 | 0.000000 |
| 32 | 1733 | 4 | 0.0 | 0.000000 |
| 33 | 1734 | 3 | 0.0 | 0.000000 |
| 34 | 1735 | 8 | 0.0 | 0.000000 |
| 35 | 1736 | 6 | 2.0 | 0.333333 |
| 36 | 1737 | 7 | 1.0 | 0.142857 |
| 37 | 1738 | 2 | 0.0 | 0.000000 |
| 38 | 1739 | 2 | 0.0 | 0.000000 |
| 39 | 1740 | 6 | 2.0 | 0.333333 |
| 40 | 1741 | 10 | 2.0 | 0.200000 |
| 41 | 1742 | 12 | 4.0 | 0.333333 |
| 42 | 1743 | 8 | 2.0 | 0.250000 |
| 43 | 1744 | 7 | 1.0 | 0.142857 |
| 44 | 1745 | 2 | 0.0 | 0.000000 |
| 45 | 1746 | 8 | 2.0 | 0.250000 |
| 46 | 1747 | 5 | 1.0 | 0.200000 |
| 47 | 1748 | 5 | 0.0 | 0.000000 |
| 48 | 1749 | 12 | 3.0 | 0.250000 |
| 49 | 1750 | 12 | 0.0 | 0.000000 |
| 50 | 1751 | 13 | 2.0 | 0.153846 |
| 51 | 1752 | 16 | 3.0 | 0.187500 |
| 52 | 1753 | 12 | 0.0 | 0.000000 |
| 53 | 1754 | 13 | 3.0 | 0.230769 |
| 54 | 1755 | 15 | 2.0 | 0.133333 |
| 55 | 1756 | 10 | 3.0 | 0.300000 |
| 56 | 1757 | 12 | 0.0 | 0.000000 |
| 57 | 1758 | 7 | 0.0 | 0.000000 |
| 58 | 1759 | 11 | 0.0 | 0.000000 |
| 59 | 1760 | 17 | 11.0 | 0.647059 |
| 60 | 1761 | 19 | 6.0 | 0.315789 |
| 61 | 1762 | 11 | 3.0 | 0.272727 |
| 62 | 1763 | 10 | 0.0 | 0.000000 |
| 63 | 1764 | 10 | 2.0 | 0.200000 |
| 64 | 1765 | 17 | 1.0 | 0.058824 |
| 65 | 1766 | 22 | 0.0 | 0.000000 |
| 66 | 1767 | 30 | 3.0 | 0.100000 |
| 67 | 1768 | 20 | 1.0 | 0.050000 |
| 68 | 1769 | 28 | 4.0 | 0.142857 |
| 69 | 1770 | 25 | 3.0 | 0.120000 |
| 70 | 1771 | 19 | 2.0 | 0.105263 |
| 71 | 1772 | 27 | 2.0 | 0.074074 |
| 72 | 1773 | 29 | 4.0 | 0.137931 |
| 73 | 1774 | 28 | 2.0 | 0.071429 |
| 74 | 1775 | 29 | 8.0 | 0.275862 |
| 75 | 1776 | 26 | 4.0 | 0.153846 |
| 76 | 1777 | 10 | 0.0 | 0.000000 |
| 77 | 1778 | 20 | 1.0 | 0.050000 |
| 78 | 1779 | 23 | 4.0 | 0.173913 |
| 79 | 1780 | 23 | 4.0 | 0.173913 |
| 80 | 1781 | 13 | 3.0 | 0.230769 |
| 81 | 1782 | 17 | 4.0 | 0.235294 |
| 82 | 1783 | 35 | 5.0 | 0.142857 |
| 83 | 1784 | 27 | 4.0 | 0.148148 |
| 84 | 1785 | 26 | 7.0 | 0.269231 |
| 85 | 1786 | 25 | 4.0 | 0.160000 |
| 86 | 1787 | 23 | 3.0 | 0.130435 |
| 87 | 1788 | 34 | 1.0 | 0.029412 |
| 88 | 1789 | 32 | 2.0 | 0.062500 |
| 89 | 1790 | 37 | 2.0 | 0.054054 |
| 90 | 1791 | 37 | 4.0 | 0.108108 |
| 91 | 1792 | 24 | 2.0 | 0.083333 |
| 92 | 1793 | 44 | 3.0 | 0.068182 |
| 93 | 1794 | 38 | 4.0 | 0.105263 |
| 94 | 1795 | 42 | 6.0 | 0.142857 |
| 95 | 1796 | 51 | 6.0 | 0.117647 |
| 96 | 1797 | 41 | 4.0 | 0.097561 |
| 97 | 1798 | 35 | 5.0 | 0.142857 |
| 98 | 1799 | 47 | 4.0 | 0.085106 |
| 99 | 1800 | 62 | 9.0 | 0.145161 |
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_IT['tag'], merged_IT['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF INTERTEXUAL FOOTNOTES (by decade)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF INTERTEXUAL FOOTNOTES (by decade) Pearson rank correlation: 0.947371250472528 p-value: 2.909949895016468e-06
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_IT_date['tag'], merged_IT_date['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF INTERTEXUAL FOOTNOTES (by year)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF INTERTEXUAL FOOTNOTES (by year) Pearson rank correlation: 0.7144346564218209 p-value: 7.05796926816569e-17
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
print("Correlation between DECADE and PROPORTION OF INTERTEXTUAL FOOTNOTES")
rho, p = pearsonr(merged_IT['decade'], merged_IT['ratio'])
#print Pearson rank correlation and p-value
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between DECADE and PROPORTION OF INTERTEXTUAL FOOTNOTES Pearson rank correlation: 0.6396944201166889 p-value: 0.02508382719579203
xtrain = merged_IT['decade']
ytrain = merged_IT['ratio']
model = sm.Logit(ytrain, xtrain).fit()
model.summary()
Optimization terminated successfully.
Current function value: 0.177513
Iterations 5
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:4465: RuntimeWarning: divide by zero encountered in scalar divide return 1 - self.llf/self.llnull
| Dep. Variable: | ratio | No. Observations: | 12 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 11 |
| Method: | MLE | Df Model: | 0 |
| Date: | Tue, 18 Mar 2025 | Pseudo R-squ.: | inf |
| Time: | 16:55:50 | Log-Likelihood: | -2.1302 |
| converged: | True | LL-Null: | 0.0000 |
| Covariance Type: | nonrobust | LLR p-value: | nan |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| decade | -0.0012 | 0.001 | -2.271 | 0.023 | -0.002 | -0.000 |
# Figure out what proportion of the larger population our subset is
proportion = merged_IT['tag'].sum() / merged_IT['footnotes_present'].sum()
proportion
7.851851851851852
# Let's run a chi2 goodness of fit test
#stats.chisquare(f_obs=merged_IT['tag_y'], f_exp=merged_IT['tag_x']/proportion)
#stats.ttest_ind(merged_IT['tag_y'],merged_IT['tag_x'])
fig, ax = plt.subplots(1,1, figsize = (8,6))
label = merged_IT["decade"]
x = np.arange(len(label))
rect2 = ax.bar(x,
merged_IT["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax.bar(x,
merged_IT["footnotes_present"],
label = "Intertextual footnotes in volumes",
edgecolor = "black")
#add the labels to the axis
ax.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax.set_title("Intertextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 20)
#set the ticks
ax.set_xticks(x)
ax.set_xticklabels(label)#add the legend
#using the labels of the bars
ax.legend(#title = "Legend",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax.tick_params(axis = "x",
which = "both",
labelrotation = 90)
ax.tick_params(axis = "y",
which = "both",
labelsize = 10)
fig, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, figsize = (8,8))
label = merged_IT["decade"]
x = np.arange(len(label))
rect2 = ax1.bar(x,
merged_IT["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax1.bar(x,
merged_IT["footnotes_present"],
label = "Intertextual footnotes in volumes",
edgecolor = "black")
rect3 = ax2.plot(x,
merged_IT["footnotes_present"]/merged_IT["tag"],
label = "Proportion of Intertextual Footnotes",
marker="o",
color="black",
markeredgecolor="black")
#add the labels to the axis
ax1.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax2.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax1.set_title("Intertextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 10)
#set the ticks
ax1.set_xticks(x)
ax1.set_xticklabels(label)#add the legend
#using the labels of the bars
ax1.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax1.tick_params(axis = "x",
which = "both",
labelrotation = 0)
ax1.tick_params(axis = "y",
which = "both",
labelsize = 10)
###
#add the labels to the axis
#set the ticks
ax2.set_xticks(x)
ax2.set_xticklabels(label)#add the legend
ax2.set_ylim(0, 0.4)
#using the labels of the bars
ax2.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax2.tick_params(axis = "x",
which = "both",
labelrotation = 0)
# Fitting a Linear Regression model to our data (binning by decade)
linear = LinearRegression()
linear.fit(merged_IT[['decade']], merged_IT['ratio'])
# Creating a scatterplot of our data (in blue)
ax = merged_IT.plot.scatter(x='decade', y='ratio')
# Adding our plotted linear regression model (in orange)
ax.plot(merged_IT['decade'], linear.predict(merged_IT[['decade']]), c='orange')
linear.score(merged_IT[['decade']], merged_IT['ratio'])
0.40920895112842604
#define response variable
y = merged_IT['ratio']
#define predictor variables
x = merged_IT[['decade']]
#add constant to predictor variables
x = sm.add_constant(x)
#fit linear regression model
model = sm.OLS(y, x).fit()
#view model summary
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: ratio R-squared: 0.409
Model: OLS Adj. R-squared: 0.350
Method: Least Squares F-statistic: 6.926
Date: Tue, 18 Mar 2025 Prob (F-statistic): 0.0251
Time: 16:58:25 Log-Likelihood: 19.352
No. Observations: 12 AIC: -34.70
Df Residuals: 10 BIC: -33.73
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const -1.9229 0.771 -2.493 0.032 -3.641 -0.204
decade 0.0012 0.000 2.632 0.025 0.000 0.002
==============================================================================
Omnibus: 8.263 Durbin-Watson: 1.683
Prob(Omnibus): 0.016 Jarque-Bera (JB): 3.955
Skew: 1.243 Prob(JB): 0.138
Kurtosis: 4.314 Cond. No. 8.82e+04
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.82e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/scipy/stats/_stats_py.py:1736: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=12
warnings.warn("kurtosistest only valid for n>=20 ... continuing "
#o = rb.beast( merged_IT['ratio'], start=1690, deltat= 10, season='none') # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of intertextual footnotes per decade')
#rb.print(o)
#o = rb.beast( merged_IT_date['ratio'], start=1690, season='none', period=1.0) # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of intertextual footnotes per year')
#rb.print(o)
x= list(merged_IT['decade'])
y= list(merged_IT['ratio'])
pw_fit = piecewise_regression.Fit(x, y, n_breakpoints=2)
pw_fit.summary()
Breakpoint Regression Results
====================================================================================================
No. Observations 12
No. Model Parameters 6
Degrees of Freedom 6
Res. Sum of Squares 0.0133366
Total Sum of Squares 0.0472661
R Squared 0.717841
Adjusted R Squared 0.379249
Converged: True
====================================================================================================
====================================================================================================
Estimate Std Err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------
const -1.91306e-14 11.3 -1.6928e-15 1.0 -27.654 27.654
alpha1 1.13068e-17 0.00667 1.6958e-15 1.0 -0.016315 0.016315
beta1 0.00555556 0.00943 0.58918 - -0.017517 0.028628
beta2 -0.00576615 0.00671 -0.85971 - -0.022178 0.010645
breakpoint1 1700.0 20.8 - - 1649.1 1750.9
breakpoint2 1726.48 15.5 - - 1688.4 1764.5
----------------------------------------------------------------------------------------------------
These alphas(gradients of segments) are estimatedfrom betas(change in gradient)
----------------------------------------------------------------------------------------------------
alpha2 0.00555556 0.00667 0.83323 0.437 -0.010759 0.02187
alpha3 -0.000210592 0.000727 -0.28948 0.782 -0.0019907 0.0015695
====================================================================================================
Davies test for existence of at least 1 breakpoint: p=0.00285752 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)
'\n Breakpoint Regression Results \n====================================================================================================\nNo. Observations 12\nNo. Model Parameters 6\nDegrees of Freedom 6\nRes. Sum of Squares 0.0133366\nTotal Sum of Squares 0.0472661\nR Squared 0.717841\nAdjusted R Squared 0.379249\nConverged: True\n====================================================================================================\n====================================================================================================\n Estimate Std Err t P>|t| [0.025 0.975]\n----------------------------------------------------------------------------------------------------\nconst -1.91306e-14 11.3 -1.6928e-15 1.0 -27.654 27.654\nalpha1 1.13068e-17 0.00667 1.6958e-15 1.0 -0.016315 0.016315\nbeta1 0.00555556 0.00943 0.58918 - -0.017517 0.028628\nbeta2 -0.00576615 0.00671 -0.85971 - -0.022178 0.010645\nbreakpoint1 1700.0 20.8 - - 1649.1 1750.9\nbreakpoint2 1726.48 15.5 - - 1688.4 1764.5\n----------------------------------------------------------------------------------------------------\nThese alphas(gradients of segments) are estimatedfrom betas(change in gradient)\n----------------------------------------------------------------------------------------------------\nalpha2 0.00555556 0.00667 0.83323 0.437 -0.010759 0.02187\nalpha3 -0.000210592 0.000727 -0.28948 0.782 -0.0019907 0.0015695\n====================================================================================================\nDavies test for existence of at least 1 breakpoint: p=0.00285752 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)\n\n'
# Plot the data, fit, breakpoints and confidence intervals
plt.subplots(figsize=(6, 2))
pw_fit.plot_data(color="grey", s=20)
# Pass in standard matplotlib keywords to control any of the plots
pw_fit.plot_fit(color="red", linestyle='dashed', linewidth=2)
pw_fit.plot_breakpoints()
pw_fit.plot_breakpoint_confidence_intervals()
plt.xlabel("Decade")
plt.ylabel("Proportion")
plt.ylim(0, 0.4)
plt.title("Proportion of Intertextual Footnotes in One-Footnote Corpus (with Model)")
plt.show()
plt.close()
x= list(merged_IT['decade'][1:])
y= list(merged_IT['ratio'][1:])
pw_fit = piecewise_regression.Fit(x, y, n_breakpoints=1)
pw_fit.summary()
Breakpoint Regression Results
====================================================================================================
No. Observations 11
No. Model Parameters 4
Degrees of Freedom 7
Res. Sum of Squares 0.0133366
Total Sum of Squares 0.0348822
R Squared 0.617668
Adjusted R Squared 0.362779
Converged: True
====================================================================================================
====================================================================================================
Estimate Std Err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------
const -9.44444 5.28 -1.7894 0.117 -21.925 3.0358
alpha1 0.00555556 0.00309 1.8 0.115 -0.0017427 0.012854
beta1 -0.00576615 0.00316 -1.8253 - -0.013236 0.0017039
breakpoint1 1726.48 11.1 - - 1700.1 1752.8
----------------------------------------------------------------------------------------------------
These alphas(gradients of segments) are estimatedfrom betas(change in gradient)
----------------------------------------------------------------------------------------------------
alpha2 -0.000210592 0.000674 -0.31267 0.764 -0.0018032 0.001382
====================================================================================================
Davies test for existence of at least 1 breakpoint: p=0.00240111 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)
'\n Breakpoint Regression Results \n====================================================================================================\nNo. Observations 11\nNo. Model Parameters 4\nDegrees of Freedom 7\nRes. Sum of Squares 0.0133366\nTotal Sum of Squares 0.0348822\nR Squared 0.617668\nAdjusted R Squared 0.362779\nConverged: True\n====================================================================================================\n====================================================================================================\n Estimate Std Err t P>|t| [0.025 0.975]\n----------------------------------------------------------------------------------------------------\nconst -9.44444 5.28 -1.7894 0.117 -21.925 3.0358\nalpha1 0.00555556 0.00309 1.8 0.115 -0.0017427 0.012854\nbeta1 -0.00576615 0.00316 -1.8253 - -0.013236 0.0017039\nbreakpoint1 1726.48 11.1 - - 1700.1 1752.8\n----------------------------------------------------------------------------------------------------\nThese alphas(gradients of segments) are estimatedfrom betas(change in gradient)\n----------------------------------------------------------------------------------------------------\nalpha2 -0.000210592 0.000674 -0.31267 0.764 -0.0018032 0.001382\n====================================================================================================\nDavies test for existence of at least 1 breakpoint: p=0.00240111 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)\n\n'
# Plot the data, fit, breakpoints and confidence intervals
plt.subplots(figsize=(6, 2))
pw_fit.plot_data(color="grey", s=20)
# Pass in standard matplotlib keywords to control any of the plots
pw_fit.plot_fit(color="red", linestyle='dashed', linewidth=2)
pw_fit.plot_breakpoints()
pw_fit.plot_breakpoint_confidence_intervals()
plt.xlabel("Decade")
plt.ylabel("Proportion")
plt.ylim(0, 0.4)
plt.title("Proportion of Intertextual Footnotes in One-Footnote Corpus (trend fitted)")
plt.show()
plt.close()
print('Number of volumes with intertextual footnotes in our one-footnote dataset:')
intertextual_footnotes_df['Title'].count()
Number of volumes with intertextual footnotes in our one-footnote dataset:
185
print('Titles of volumes with intertextual footnotes (in our one-footnote dataset):')
intertextual_footnotes_df[['Title','Vol_Number']]
Titles of volumes with intertextual footnotes (in our one-footnote dataset):
| Title | Vol_Number | |
|---|---|---|
| 0 | Prince Arthur: an allegorical romance. The sto... | Volume 1 |
| 11 | Les delices du sentiment; or the passionate lo... | 0 |
| 19 | Pigmalion, ou la statue anim<c3><a9>e. | 0 |
| 25 | The confessions of James Baptiste Couteau, cit... | Volume 2 |
| 30 | The farmer's son of Kent. A tale. In two volum... | Volume 2 |
| ... | ... | ... |
| 1438 | Letters of the late Lord Lyttleton. In two vol... | Volume 1 |
| 1458 | Les confessions d'une courtisane devenue philo... | 0 |
| 1462 | The trial of a student at the College of Cluth... | 0 |
| 1466 | The history of Tom Jones, a foundling. By Henr... | Volume 2 |
| 1477 | Relation d'une insigne imposture litt<c3><a9>r... | 0 |
185 rows × 2 columns
intertextual_footnotes_df['Filename'].to_csv('../intertextual_footnotes_filenames.csv')
genre_term = "memoir|Memoir"
dataframe = intertextual_footnotes_df
corpus_name = "Intertextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_intertextual_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 8 Percent of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 0.043243243243243246
genre_term = "letter|Letter|lettre|Lettre"
dataframe = intertextual_footnotes_df
corpus_name = "Intertextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_intertextual_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 29 Percent of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 0.15675675675675677
genre_term = "novel|Novel|roman|Roman"
dataframe = intertextual_footnotes_df
corpus_name = "Intertextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 11 Percent of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 0.05945945945945946
genre_term = "tale|Tale|recit|Recit"
dataframe = intertextual_footnotes_df
corpus_name = "Intertextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_intertextual_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 15 Percent of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 0.08108108108108109
genre_term = "romance|Romance"
dataframe = intertextual_footnotes_df
corpus_name = "Intertextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 4 Percent of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 0.021621621621621623
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = intertextual_footnotes_df
corpus_name = "Intertextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 29 Percent of volumes in the Intertextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 0.15675675675675677
# Proportions of Memoirs, Letters, Tales
proportions_intertextual = [proportion_intertextual_memoirs, proportion_intertextual_letters, proportion_intertextual_tale]
proportions_intertextual
[0.043243243243243246, 0.15675675675675677, 0.08108108108108109]
# Let's look at the 'IN' tag
intratextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'IN']
# Plot the nubmer of IT tagged footnotes in each year
ax = intratextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().plot(kind="bar", figsize=(15, 5), title="Intratextual footnotes in our one-footnote dataset, by year")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Date")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'IN'"])
<matplotlib.legend.Legend at 0x147af9590>
# Plot the nubmer of IN tagged footnotes in each decade
ax = intratextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Intratextual footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'IN'"])
<matplotlib.legend.Legend at 0x147ab4c90>
ax = intratextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Intratextual footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
ax.set_ylim(0,100)
plt.legend(["Volumes with tag 'IN'"])
<matplotlib.legend.Legend at 0x147b77cd0>
# Create a dataframe of all volumes (including those misclassified in one-footnote-dataset)
all_footnotes_by_decade = one_footnote_tagged_new_df.groupby('decade')[['tag']].count().reset_index()
#not_intratextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] != 'IN']
#not_intratextual_footnotes_by_decade = not_intratextual_footnotes_df.groupby('decade')[['tag']].count().reset_index()
intratextual_footnotes_by_decade = intratextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_IN = pd.merge_ordered(all_footnotes_by_decade,intratextual_footnotes_by_decade, on='decade').fillna(0)
merged_IN['ratio'] = merged_IN['footnotes_present'] / merged_IN['tag']
merged_IN
| decade | tag | footnotes_present | ratio | |
|---|---|---|---|---|
| 0 | 1690 | 1 | 0.0 | 0.000000 |
| 1 | 1700 | 20 | 0.0 | 0.000000 |
| 2 | 1710 | 36 | 1.0 | 0.027778 |
| 3 | 1720 | 54 | 3.0 | 0.055556 |
| 4 | 1730 | 44 | 1.0 | 0.022727 |
| 5 | 1740 | 75 | 2.0 | 0.026667 |
| 6 | 1750 | 121 | 6.0 | 0.049587 |
| 7 | 1760 | 184 | 18.0 | 0.097826 |
| 8 | 1770 | 236 | 30.0 | 0.127119 |
| 9 | 1780 | 255 | 24.0 | 0.094118 |
| 10 | 1790 | 396 | 24.0 | 0.060606 |
| 11 | 1800 | 62 | 1.0 | 0.016129 |
# Create a dataframe of all volumes (including those misclassified in one-footnote-dataset)
all_footnotes_by_date = one_footnote_tagged_new_df.groupby('Date')[['tag']].count().reset_index()
#not_intertextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] != 'IT']
#not_intertextual_footnotes_by_decade = not_intertextual_footnotes_df.groupby('decade')[['tag']].count().reset_index()
intratextual_footnotes_by_date = intratextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_IN_date = pd.merge_ordered(all_footnotes_by_date,intratextual_footnotes_by_date, on='Date').fillna(0)
merged_IN_date['ratio'] = merged_IN_date['footnotes_present'] / merged_IN_date['tag']
merged_IN_date
| Date | tag | footnotes_present | ratio | |
|---|---|---|---|---|
| 0 | 1697 | 1 | 0.0 | 0.000000 |
| 1 | 1701 | 2 | 0.0 | 0.000000 |
| 2 | 1702 | 3 | 0.0 | 0.000000 |
| 3 | 1703 | 2 | 0.0 | 0.000000 |
| 4 | 1704 | 1 | 0.0 | 0.000000 |
| 5 | 1705 | 5 | 0.0 | 0.000000 |
| 6 | 1706 | 2 | 0.0 | 0.000000 |
| 7 | 1707 | 2 | 0.0 | 0.000000 |
| 8 | 1708 | 1 | 0.0 | 0.000000 |
| 9 | 1709 | 2 | 0.0 | 0.000000 |
| 10 | 1710 | 8 | 1.0 | 0.125000 |
| 11 | 1711 | 3 | 0.0 | 0.000000 |
| 12 | 1712 | 3 | 0.0 | 0.000000 |
| 13 | 1713 | 2 | 0.0 | 0.000000 |
| 14 | 1715 | 6 | 0.0 | 0.000000 |
| 15 | 1716 | 3 | 0.0 | 0.000000 |
| 16 | 1717 | 2 | 0.0 | 0.000000 |
| 17 | 1718 | 3 | 0.0 | 0.000000 |
| 18 | 1719 | 6 | 0.0 | 0.000000 |
| 19 | 1720 | 6 | 1.0 | 0.166667 |
| 20 | 1721 | 5 | 0.0 | 0.000000 |
| 21 | 1722 | 3 | 0.0 | 0.000000 |
| 22 | 1723 | 4 | 0.0 | 0.000000 |
| 23 | 1724 | 4 | 0.0 | 0.000000 |
| 24 | 1725 | 8 | 1.0 | 0.125000 |
| 25 | 1726 | 7 | 1.0 | 0.142857 |
| 26 | 1727 | 7 | 0.0 | 0.000000 |
| 27 | 1728 | 7 | 0.0 | 0.000000 |
| 28 | 1729 | 3 | 0.0 | 0.000000 |
| 29 | 1730 | 3 | 0.0 | 0.000000 |
| 30 | 1731 | 4 | 0.0 | 0.000000 |
| 31 | 1732 | 5 | 0.0 | 0.000000 |
| 32 | 1733 | 4 | 0.0 | 0.000000 |
| 33 | 1734 | 3 | 0.0 | 0.000000 |
| 34 | 1735 | 8 | 1.0 | 0.125000 |
| 35 | 1736 | 6 | 0.0 | 0.000000 |
| 36 | 1737 | 7 | 0.0 | 0.000000 |
| 37 | 1738 | 2 | 0.0 | 0.000000 |
| 38 | 1739 | 2 | 0.0 | 0.000000 |
| 39 | 1740 | 6 | 0.0 | 0.000000 |
| 40 | 1741 | 10 | 0.0 | 0.000000 |
| 41 | 1742 | 12 | 0.0 | 0.000000 |
| 42 | 1743 | 8 | 0.0 | 0.000000 |
| 43 | 1744 | 7 | 0.0 | 0.000000 |
| 44 | 1745 | 2 | 0.0 | 0.000000 |
| 45 | 1746 | 8 | 0.0 | 0.000000 |
| 46 | 1747 | 5 | 0.0 | 0.000000 |
| 47 | 1748 | 5 | 0.0 | 0.000000 |
| 48 | 1749 | 12 | 2.0 | 0.166667 |
| 49 | 1750 | 12 | 1.0 | 0.083333 |
| 50 | 1751 | 13 | 0.0 | 0.000000 |
| 51 | 1752 | 16 | 2.0 | 0.125000 |
| 52 | 1753 | 12 | 1.0 | 0.083333 |
| 53 | 1754 | 13 | 0.0 | 0.000000 |
| 54 | 1755 | 15 | 0.0 | 0.000000 |
| 55 | 1756 | 10 | 0.0 | 0.000000 |
| 56 | 1757 | 12 | 1.0 | 0.083333 |
| 57 | 1758 | 7 | 0.0 | 0.000000 |
| 58 | 1759 | 11 | 1.0 | 0.090909 |
| 59 | 1760 | 17 | 1.0 | 0.058824 |
| 60 | 1761 | 19 | 2.0 | 0.105263 |
| 61 | 1762 | 11 | 0.0 | 0.000000 |
| 62 | 1763 | 10 | 2.0 | 0.200000 |
| 63 | 1764 | 10 | 1.0 | 0.100000 |
| 64 | 1765 | 17 | 7.0 | 0.411765 |
| 65 | 1766 | 22 | 1.0 | 0.045455 |
| 66 | 1767 | 30 | 2.0 | 0.066667 |
| 67 | 1768 | 20 | 0.0 | 0.000000 |
| 68 | 1769 | 28 | 2.0 | 0.071429 |
| 69 | 1770 | 25 | 1.0 | 0.040000 |
| 70 | 1771 | 19 | 2.0 | 0.105263 |
| 71 | 1772 | 27 | 4.0 | 0.148148 |
| 72 | 1773 | 29 | 3.0 | 0.103448 |
| 73 | 1774 | 28 | 3.0 | 0.107143 |
| 74 | 1775 | 29 | 7.0 | 0.241379 |
| 75 | 1776 | 26 | 6.0 | 0.230769 |
| 76 | 1777 | 10 | 0.0 | 0.000000 |
| 77 | 1778 | 20 | 4.0 | 0.200000 |
| 78 | 1779 | 23 | 0.0 | 0.000000 |
| 79 | 1780 | 23 | 2.0 | 0.086957 |
| 80 | 1781 | 13 | 2.0 | 0.153846 |
| 81 | 1782 | 17 | 0.0 | 0.000000 |
| 82 | 1783 | 35 | 6.0 | 0.171429 |
| 83 | 1784 | 27 | 4.0 | 0.148148 |
| 84 | 1785 | 26 | 3.0 | 0.115385 |
| 85 | 1786 | 25 | 0.0 | 0.000000 |
| 86 | 1787 | 23 | 3.0 | 0.130435 |
| 87 | 1788 | 34 | 0.0 | 0.000000 |
| 88 | 1789 | 32 | 4.0 | 0.125000 |
| 89 | 1790 | 37 | 3.0 | 0.081081 |
| 90 | 1791 | 37 | 1.0 | 0.027027 |
| 91 | 1792 | 24 | 4.0 | 0.166667 |
| 92 | 1793 | 44 | 3.0 | 0.068182 |
| 93 | 1794 | 38 | 2.0 | 0.052632 |
| 94 | 1795 | 42 | 4.0 | 0.095238 |
| 95 | 1796 | 51 | 1.0 | 0.019608 |
| 96 | 1797 | 41 | 0.0 | 0.000000 |
| 97 | 1798 | 35 | 3.0 | 0.085714 |
| 98 | 1799 | 47 | 3.0 | 0.063830 |
| 99 | 1800 | 62 | 1.0 | 0.016129 |
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_IN['tag'], merged_IN['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF INTRATEXUAL FOOTNOTES (by decade)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF INTRATEXUAL FOOTNOTES (by decade) Pearson rank correlation: 0.9060555629281141 p-value: 4.9131974916967076e-05
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_IN_date['tag'], merged_IN_date['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF INTRATEXUAL FOOTNOTES (by year)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF INTRATEXUAL FOOTNOTES (by year) Pearson rank correlation: 0.5810292125489733 p-value: 2.333096080802051e-10
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_IN['decade'], merged_IN['ratio'])
#print Pearson rank correlation and p-value
print("Correlation between DECADE and PROPORTION OF OF INTRATEXUAL FOOTNOTES")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between DECADE and PROPORTION OF OF INTRATEXUAL FOOTNOTES Pearson rank correlation: 0.5798297681200253 p-value: 0.048137206681146995
# Figure out what proportion of the larger population our subset is
proportion = merged_IN['tag'].sum() / merged_IN['footnotes_present'].sum()
proportion
13.49090909090909
# Let's run a chi2 goodness of fit test
#stats.chisquare(f_obs=merged_IN['tag_y'], f_exp=merged_IN['tag_x']/proportion)
xtrain = merged_IN['decade']
ytrain = merged_IN['ratio']
model = sm.Logit(ytrain, xtrain).fit()
model.summary()
Optimization terminated successfully.
Current function value: 0.067317
Iterations 6
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:4465: RuntimeWarning: divide by zero encountered in scalar divide return 1 - self.llf/self.llnull
| Dep. Variable: | ratio | No. Observations: | 12 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 11 |
| Method: | MLE | Df Model: | 0 |
| Date: | Tue, 18 Mar 2025 | Pseudo R-squ.: | inf |
| Time: | 17:06:04 | Log-Likelihood: | -0.80781 |
| converged: | True | LL-Null: | 0.0000 |
| Covariance Type: | nonrobust | LLR p-value: | nan |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| decade | -0.0017 | 0.001 | -2.215 | 0.027 | -0.003 | -0.000 |
fig, ax = plt.subplots(1,1, figsize = (8,6))
label = merged_IN["decade"]
x = np.arange(len(label))
rect2 = ax.bar(x,
merged_IN["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax.bar(x,
merged_IN["footnotes_present"],
label = "Intratextual footnotes in volumes",
edgecolor = "black")
#add the labels to the axis
ax.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax.set_title("Intratextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 20)
#set the ticks
ax.set_xticks(x)
ax.set_xticklabels(label)#add the legend
#using the labels of the bars
ax.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax.tick_params(axis = "x",
which = "both",
labelrotation = 90)
ax.tick_params(axis = "y",
which = "both",
labelsize = 10)
fig, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, figsize = (8,8))
label = merged_IN["decade"]
x = np.arange(len(label))
rect2 = ax1.bar(x,
merged_IN["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax1.bar(x,
merged_IN["footnotes_present"],
label = "Intratextual footnotes in volumes",
edgecolor = "black")
rect3 = ax2.plot(x,
merged_IN["footnotes_present"]/merged_IN["tag"],
label = "Proportion of Intratextual Footnotes",
marker="o",
color="black",
markeredgecolor="black")
#add the labels to the axis
ax1.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax2.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax1.set_title("Intratextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 10)
#set the ticks
ax1.set_xticks(x)
ax1.set_xticklabels(label)#add the legend
#using the labels of the bars
ax1.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax1.tick_params(axis = "x",
which = "both",
labelrotation = 0)
ax1.tick_params(axis = "y",
which = "both",
labelsize = 10)
###
#add the labels to the axis
#set the ticks
ax2.set_xticks(x)
ax2.set_xticklabels(label)#add the legend
ax2.set_ylim(0, 0.4)
#using the labels of the bars
ax2.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax2.tick_params(axis = "x",
which = "both",
labelrotation = 0)
# Fitting a Linear Regression model to our data (binning by decade)
linear = LinearRegression()
linear.fit(merged_IN[['decade']], merged_IN['ratio'])
# Creating a scatterplot of our data (in blue)
ax = merged_IN.plot.scatter(x='decade', y='ratio')
# Adding our plotted linear regression model (in orange)
ax.plot(merged_IN['decade'], linear.predict(merged_IN[['decade']]), linestyle='dashed', c='orange')
linear.score(merged_IN[['decade']], merged_IN['ratio'])
0.308673706576428
#o = rb.beast( merged_IN['ratio'], start=1690, deltat= 10, season='none') # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of intratextual footnotes per decade')
#rb.print(o)
#o = rb.beast( merged_IN_date['ratio'], start=1690, season='none', period=1.0) # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of intratextual footnotes per year')
#rb.print(o)
x= list(merged_IN['decade'])
y= list(merged_IN['ratio'])
pw_fit = piecewise_regression.Fit(x, y, n_breakpoints=1)
pw_fit.summary()
Breakpoint Regression Results
====================================================================================================
No. Observations 12
No. Model Parameters 4
Degrees of Freedom 8
Res. Sum of Squares 0.00374863
Total Sum of Squares 0.0182139
R Squared 0.794188
Adjusted R Squared 0.676582
Converged: True
====================================================================================================
====================================================================================================
Estimate Std Err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------
const -2.30952 0.484 -4.7765 0.0014 -3.4245 -1.1945
alpha1 0.00136114 0.000279 4.8706 0.00124 0.00071671 0.0020056
beta1 -0.00526057 0.00156 -3.3809 - -0.0088486 -0.0016725
breakpoint1 1776.7 5.35 - - 1764.4 1789.0
----------------------------------------------------------------------------------------------------
These alphas(gradients of segments) are estimatedfrom betas(change in gradient)
----------------------------------------------------------------------------------------------------
alpha2 -0.00389943 0.00153 -2.5476 0.0343 -0.0074291 -0.00036974
====================================================================================================
Davies test for existence of at least 1 breakpoint: p=5.43646e-07 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)
'\n Breakpoint Regression Results \n====================================================================================================\nNo. Observations 12\nNo. Model Parameters 4\nDegrees of Freedom 8\nRes. Sum of Squares 0.00374863\nTotal Sum of Squares 0.0182139\nR Squared 0.794188\nAdjusted R Squared 0.676582\nConverged: True\n====================================================================================================\n====================================================================================================\n Estimate Std Err t P>|t| [0.025 0.975]\n----------------------------------------------------------------------------------------------------\nconst -2.30952 0.484 -4.7765 0.0014 -3.4245 -1.1945\nalpha1 0.00136114 0.000279 4.8706 0.00124 0.00071671 0.0020056\nbeta1 -0.00526057 0.00156 -3.3809 - -0.0088486 -0.0016725\nbreakpoint1 1776.7 5.35 - - 1764.4 1789.0\n----------------------------------------------------------------------------------------------------\nThese alphas(gradients of segments) are estimatedfrom betas(change in gradient)\n----------------------------------------------------------------------------------------------------\nalpha2 -0.00389943 0.00153 -2.5476 0.0343 -0.0074291 -0.00036974\n====================================================================================================\nDavies test for existence of at least 1 breakpoint: p=5.43646e-07 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)\n\n'
# Plot the data, fit, breakpoints and confidence intervals
plt.subplots(figsize=(6, 2))
pw_fit.plot_data(color="grey", s=20)
# Pass in standard matplotlib keywords to control any of the plots
pw_fit.plot_fit(color="red", linestyle='dashed', linewidth=2)
pw_fit.plot_breakpoints()
pw_fit.plot_breakpoint_confidence_intervals()
plt.xlabel("Decade")
plt.ylabel("Proportion")
plt.ylim(0, 0.4)
plt.title("Proportion of Intratextual Footnotes in One-Footnote Corpus (trend fitted)")
plt.show()
plt.close()
print('Number of volumes with intratextual footnotes in our one-footnote dataset:')
intratextual_footnotes_df['Title'].count()
Number of volumes with intratextual footnotes in our one-footnote dataset:
116
print('Titles of volumes with intratextual footnotes (in our one-footnote dataset):')
intratextual_footnotes_df[['Title','Vol_Number']]
Titles of volumes with intratextual footnotes (in our one-footnote dataset):
| Title | Vol_Number | |
|---|---|---|
| 49 | The life and strange surprising adventures of Robinson Crusoe; of York, mariner: who lived eight... | Volume 2 |
| 54 | Opuscule d'un c<c3><a9>l<c3><a9>bre auteur Egyptien. Contenant l'histoire d'Orph<c3><a9>e, par l... | 0 |
| 89 | L'art de corriger et de rendre les hommes constans. Prix 30 sols. | 0 |
| 113 | The invasion; or, what might have been. A novel. In two volumes. ... | Volume 1 |
| 130 | Delia, a pathetic and interesting tale. In Four Volumes ... | Volume 2 |
| 136 | Les avantures de Gil Blas de Santillane. Par Monsieur Le Sage. Nouvelle edition, avec des figures. | Volume 3 |
| 140 | Letters between an English lady and her friend at Paris. In which are contained, The memoirs of ... | Volume 1 |
| 148 | Burton-Wood. In a series of letters. By a lady. ... | Volume 1 |
| 160 | Orlando and Lavinia: or, the libertine. A novel. In four volumes. By a lady. ... | Volume 4 |
| 163 | He is found at last: or, memoirs of the Beverley family. In two volumes. ... | Volume 2 |
| 176 | The history of Sir Charles Grandison. In a series of letters. By Mr Samuel Richardson, Author of... | Volume 8 |
| 187 | Memoirs of Mary, a novel. By Mrs. Gunning. In five volumes. ... | Volume 2 |
| 218 | The life and opinions of Tristram Shandy, gentleman. In three volumes. ... | Volume 5 |
| 223 | Sentimental memoirs: by a lady. ... | Volume 2 |
| 258 | Memoirs of the life, sufferings, and surprising adventures of a noble foreigner at *******. To w... | 0 |
| 263 | History of Lady Bettesworth and Captain Hastings. In a series of letters. In two volumes. ... | Volume 1 |
| 271 | Radzivil, a romance. Translated from the Russ of the celebrated M. Wocklow. In three volumes. ... | Volume 1 |
| 272 | The history of Sir Charles Grandison; in a series of letters. By Mr. Samuel Richardson, Author o... | Volume 1 |
| 300 | The history of Lord Belford, and Miss Sophia Woodley. In a series of letters. ... | Volume 3 |
| 305 | The example: or the history of Lucy Cleveland. By a young lady. ... | Volume 2 |
| 313 | The self-Tormentor, a novel. In three volumes. ... | Volume 2 |
| 316 | The history of Mademoiselle de Beleau; or, the new Roxana, the fortunate mistress: afterwards Co... | 0 |
| 330 | The history of Tom Jones, a foundling. By Henry Fielding, Esq; In four volumes. ... | Volume 1 |
| 340 | The female American; or, the adventures of Unca Eliza Winkfield. Compiled by herself. In two vol... | Volume 2 |
| 342 | The history of Miss Sommervile. Written by a lady. In two volumes. ... | Volume 1 |
| 344 | The history of Cleanthes, an Englishman of the highest Quality, and Celemene, the Illustrious Am... | Volume 1 |
| 345 | The history of Tom Jones, a foundling. By Henry Fielding, Esq; ... | Volume 2 |
| 361 | The history of Joseph Andrews, and his friend Mr. Abraham Adams. Written in Imitation of the Man... | Volume 2 |
| 364 | The fine lady a novel by the author of Miss Melmoth. In two volumes. ... | Volume 2 |
| 374 | The ladies advocate: or, wit and beauty a match for treachery and inconstancy. Containing a seri... | 0 |
| 389 | The parasite. ... | Volume 2 |
| 401 | The false friend: a domestic story. By Mary Robinson, Author of Poems, Walsingham, Angelina, Hub... | Volume 4 |
| 402 | The adventures of Mr. Loveill, interspers'd with many real amours of the modern polite world. ... | Volume 2 |
| 440 | The life and adventures of Robinson Crusoe, of York, mariner: who lived eight and twenty years a... | Volume 2 |
| 447 | Theopha; or, memoirs of a Greek slave; as related by her lover, envoy from the court of France t... | Volume 2 |
| 448 | The adventure of a bale of goods from America, in consequence of the Stamp Act. | 0 |
| 451 | Authentic and interesting memoirs of Miss Ann Sheldon; (now Mrs. Archer:) A Lady who figured, du... | Volume 1 |
| 456 | The history of Lady Sophia Sternheim. Attempted from the German of Mr. Wieland..... | Volume 2 |
| 469 | The history of Sir Charles Grandison. In a series of letters. By Mr. Samuel Richardson. In seven... | Volume 1 |
| 471 | The history of Sir Charles Grandison. In a series of letters. By Mr. Samuel Richardson. In seven... | Volume 7 |
| 472 | The history of the adventures of Joseph Andrews, and of his friend Mr. Abraham Adams. Written in... | Volume 2 |
| 476 | A journey from this world to the next, &c. &c. by Henry Fielding, Esq. | 0 |
| 490 | L'espion chinois: ou, L'Envoy<c3><a9> secret de la cour de Pekin, pour examiner L'<c3><a9>tat Pr... | Volume 2 |
| 491 | The will of a certain northern vicar. The second edition, with corrections. To which is annex'd ... | 0 |
| 522 | Elisa Powell, or trials of sensibility: a series of original letters, collected by a Welsh curat... | Volume 2 |
| 528 | The hermit: or, the unparallel'd sufferings and surprising adventures of Mr. Philip Quarll, an E... | 0 |
| 535 | The adventures of David Simple: containing an account of his travels through the cities of Londo... | Volume 1 |
| 566 | Letters from Yorick to Eliza and Sterne's letters to his friends on various occasions. To which ... | Volume 2 |
| 567 | Evelina; or, a young lady's entrance into the world. In two volumes. ... | Volume 2 |
| 580 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 3 |
| 585 | Letters of the Right Honourable Lady My W---y M----e: written, during her travels in Europe, Asi... | Volume 1 |
| 592 | Henrietta of Gerstenfeld. Translated from the German of Mr. Wieland. | 0 |
| 595 | Herman of Unna: a series of adventures of the fifteenth century, in which the proceedings of the... | Volume 3 |
| 603 | The life and opinions of Tristram Shandy, gentleman. ... | Volume 2 |
| 619 | The citizen of the world: or, letters from a Chinese philosopher, residing in London, to his fri... | Volume 2 |
| 620 | Lettres de Madame la Marquise de Pompadour, depuis MDCCLIII jusqu'<c3><a0> MDCCLXII, inclusiveme... | Volume 3 |
| 621 | Slavery: or, the times. By the author of Monmouth, The Danish massacre, &c. | 0 |
| 626 | Honny soit qui mal y pense, ou histoires des filles c<c3><a9>lebres du XVIIIe siecle. ... | Volume 2 |
| 645 | The epistles of Clio and Strephon, being a collection of letters that passed between an English ... | 0 |
| 684 | The adventurer. ... . On vent'rous wing in quest of praise I go, And leave the gazing multitude ... | Volume 2 |
| 695 | The connoisseur. By Mr. Town, critic and censor-general. ... | Volume 4 |
| 704 | The virtuous orphan; or the life, misfortunes, and adventures, of Indiana. Written by herself. I... | Volume 1 |
| 713 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 2 |
| 719 | A stage-Coach journey to Exeter. Describing the humours on the road, with the characters and adv... | 0 |
| 731 | Julia de Roubign<c3><a9>, a tale. In a series of letters. Published by the author of The man of ... | Volume 2 |
| 758 | Six satires of Horace, in a style between free imitation and literal version. By William Clubbe,... | 0 |
| 792 | The penitent thief: or, or, a narrative Of two women fearing God, who visited in prison a highwa... | 0 |
| 796 | Memoirs of Mary, a novel. By Mrs. Gunning. In five volumes. ... | Volume 1 |
| 797 | Memoirs of Mary, a novel. By Mrs. Gunning. In five volumes. ... | Volume 2 |
| 806 | Alphonse D'Inange, ou le nouveau Grandisson. Seconde Partie. | Volume 4 |
| 820 | Slavery: or, the times. In two volumes. By the author of Monmouth, The Danish massacre, &c. ... | Volume 1 |
| 840 | The man of feeling. | 0 |
| 867 | The ladies complete letter-writer; teaching the art of inditing letters on every subject that ca... | 0 |
| 874 | The female Quixote; or, the adventures of Arabella. By Mrs. Lennox. In two volumes. ... Cooke's ... | Volume 1 |
| 878 | The novelist's magazine. ... | Volume 22 |
| 898 | The history of Fanny Meadows. In a series of letters. By the author of The exemplary mother. ... | Volume 1 |
| 904 | An affecting relation of the appearance of Thomas Ostrehan's apparition, to his friend Robert St... | 0 |
| 909 | Emma Corbett. In two volumes. By Mr. Pratt. ... | Volume 1 |
| 949 | An account of the convincement, exercises, services, and travels, of That Ancient Servant of the... | 0 |
| 953 | The life and opinions of Tristram Shandy, gentleman. Vol. Vii. | Volume 7 |
| 954 | The life and opinions of Tristram Shandy, gentleman. Vol. Vii. | Volume 8 |
| 956 | Le couvent, ou histoire de Sophie Nelson. Traduit de l'anglois. Deuxieme partie. | Volume 1 |
| 987 | The woman of letters; or, the history of Miss Fanny Belton. In two volumes. ... | Volume 2 |
| 988 | The excursion, a novel, by Mrs. Brooke, Author of Julia Mandeville, Emily Montague, &c. In two v... | Volume 1 |
| 998 | A learned dissertation on dumpling; its dignity, antiquity, and excellence. With a word upon pud... | 0 |
| 1006 | The history of Sir Charles Grandison. In a series of letters. By Mr. Samuel Richardson, ... In e... | Volume 4 |
| 1009 | The mistakes of the heart: or, memoirs of Lady Carolina Pelham and Lady Victoria Nevil. In a ser... | Volume 2 |
| 1041 | The English rogue; or, life of Jeremy Sharp: to which is added a narrative of an extradordinary ... | Volume 3 |
| 1048 | A father's legacy to his daughters. By the late Dr. Gregory of Edinburgh. | 0 |
| 1061 | Nouvelles fables, avec une traduction De quelques Sonnets choisis de petrarque. Et une romance. ... | 0 |
| 1070 | Mori<c3><a6> encomium: or, the praise of folly. Made English from the Latin of Erasmus. By W. Ke... | 0 |
| 1071 | The selector: being a new and chaste collection of visions, tales, and allegories, calculated fo... | 0 |
| 1081 | Friendship in death: in twenty letters from the dead to the living. To which are added, letters ... | 0 |
| 1118 | It is, and it is not a novel. In two volumes. By Charlotte Palmer. ... | Volume 2 |
| 1134 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 3 |
| 1135 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 5 |
| 1146 | The works of Dr. Jonathan Swift, Dean of St. Patrick's, Dublin. Volume XV. Collected and Revised... | Volume 15 |
| 1147 | The expedition of Humphry Clinker. By the author of Roderick Random. In two volumes. ... | Volume 2 |
| 1150 | Paradise lost. A poem, in twelve books. The author John Milton. A new edition. To which is added... | 0 |
| 1158 | The history of the adventures of Joseph Andrews, and his Friend Mr Abraham Adams. Written in imi... | Volume 1 |
| 1178 | The modern fanatick. Part II. Containing what is necessary to clear all the matters of fact in t... | 0 |
| 1196 | Cheap Repository. The two wealthy farmers; or, the history of Mr. Bragwell. In seven parts. | 0 |
| 1207 | Poems by the Rev. Josiah Relph, of Sebergham. With the life of the author. Third edition, with i... | 0 |
| 1217 | Henrietta, Countess Osenvor, a sentimental novel, in a series of letters to Lady Susannah Fitzro... | Volume 1 |
| 1255 | The ring, a novel: In a Series of Letters. By a Young Lady. In Three Volumes. ... | Volume 2 |
| 1273 | The moral miscellany: or, a collection of select pieces, in prose and verse. For the instruction... | 0 |
| 1285 | Arundel. By the author of The observer. ... | Volume 2 |
| 1302 | The history of John Juniper, Esq. alias Juniper Jack. Containing the birth, parentage, and educa... | Volume 2 |
| 1317 | Julia de Roubign<c3><a9>, a tale. In a series of letters. Published by the author of The man of ... | Volume 2 |
| 1338 | The sylph; a novel. | 0 |
| 1395 | The history of Mademoiselle de Beleau; or, the new Roxana, the fortunate mistress: afterwards Co... | 0 |
| 1406 | Frederica: or the memoirs of a young lady. A novel, in two volumes. By a lady. Dedicated to Her ... | Volume 2 |
| 1413 | The history of Miss Melmoth. In two volumes. By the author of The fine lady. ... | Volume 1 |
| 1426 | Emma Corbett. In two volumes. ... | Volume 2 |
| 1438 | Letters of the late Lord Lyttleton. In two volumes. ... | Volume 1 |
| 1449 | The adventurer. ... | Volume 1 |
intratextual_footnotes_df['Filename'].to_csv('../intratextual_footnotes_filenames.csv')
genre_term = "memoir|Memoir"
dataframe = intratextual_footnotes_df
corpus_name = "Intratextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_intratextual_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 11 Percent of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir': 0.09482758620689655
genre_term = "letter|Letter|lettre|Lettre"
dataframe = intratextual_footnotes_df
corpus_name = "Intratextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_intratextual_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 28 Percent of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 0.2413793103448276
genre_term = "novel|Novel|roman|Roman"
dataframe = intratextual_footnotes_df
corpus_name = "Intratextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 17 Percent of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 0.14655172413793102
genre_term = "tale|Tale|recit|Recit"
dataframe = intratextual_footnotes_df
corpus_name = "Intratextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_intratextual_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 4 Percent of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 0.034482758620689655
genre_term = "romance|Romance"
dataframe = intratextual_footnotes_df
corpus_name = "Intratextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 3 Percent of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 0.02586206896551724
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = intratextual_footnotes_df
corpus_name = "Intratextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 27 Percent of volumes in the Intratextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 0.23275862068965517
# Proportions of Memoirs, Letters, Tales
proportions_intratextual = [proportion_intratextual_memoirs, proportion_intratextual_letters, proportion_intratextual_tale]
proportions_intratextual
[0.09482758620689655, 0.2413793103448276, 0.034482758620689655]
# Let's look at just the MT tags
metatextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'MT']
# Plot the nubmer of MT tagged footnotes in each year
ax = metatextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Metatextual footnotes in our one-footnote dataset, by year")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Date")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'MT'"])
<matplotlib.legend.Legend at 0x145379290>
# Plot the number of MT tagged footnotes in each decade
ax = metatextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Metatextual footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'MT'"])
<matplotlib.legend.Legend at 0x144be7290>
# Redo so scale matches contextual
ax = metatextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().plot(kind="bar", figsize=(10, 5), title="Metatextual footnotes in our one-footnote dataset, by decade")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
ax.set_ylim(0,100)
plt.legend(["Volumes with tag 'MT'"])
<matplotlib.legend.Legend at 0x144f185d0>
# Create a dataframe of all volumes (including those misclassified in one-footnote-dataset)
all_footnotes_by_decade = one_footnote_tagged_new_df.groupby('decade')[['tag']].count().reset_index()
#not_metatextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] != 'MT']
#not_metatextual_footnotes_by_decade = not_metatextual_footnotes_df.groupby('decade')[['tag']].count().reset_index()
metatextual_footnotes_by_decade = metatextual_footnotes_df.groupby('decade')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_MT = pd.merge_ordered(all_footnotes_by_decade,metatextual_footnotes_by_decade, on='decade').fillna(0)
merged_MT['ratio'] = merged_MT['footnotes_present'] / merged_MT['tag']
merged_MT
| decade | tag | footnotes_present | ratio | |
|---|---|---|---|---|
| 0 | 1690 | 1 | 0.0 | 0.000000 |
| 1 | 1700 | 20 | 0.0 | 0.000000 |
| 2 | 1710 | 36 | 0.0 | 0.000000 |
| 3 | 1720 | 54 | 4.0 | 0.074074 |
| 4 | 1730 | 44 | 1.0 | 0.022727 |
| 5 | 1740 | 75 | 0.0 | 0.000000 |
| 6 | 1750 | 121 | 4.0 | 0.033058 |
| 7 | 1760 | 184 | 9.0 | 0.048913 |
| 8 | 1770 | 236 | 17.0 | 0.072034 |
| 9 | 1780 | 255 | 8.0 | 0.031373 |
| 10 | 1790 | 396 | 10.0 | 0.025253 |
| 11 | 1800 | 62 | 2.0 | 0.032258 |
# Create a dataframe of all volumes (including those misclassified in one-footnote-dataset)
all_footnotes_by_date = one_footnote_tagged_new_df.groupby('Date')[['tag']].count().reset_index()
#not_metatextual_footnotes_df = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] != 'MT']
#not_metatextual_footnotes_by_decade = not_metatextual_footnotes_df.groupby('decade')[['tag']].count().reset_index()
metatextual_footnotes_by_date = metatextual_footnotes_df.groupby('Date')[['footnotes_present']].sum().reset_index()
# Merge the two dataframes
merged_MT_date = pd.merge_ordered(all_footnotes_by_date,metatextual_footnotes_by_date, on='Date').fillna(0)
merged_MT_date['ratio'] = merged_MT_date['footnotes_present'] / merged_MT_date['tag']
merged_MT_date
| Date | tag | footnotes_present | ratio | |
|---|---|---|---|---|
| 0 | 1697 | 1 | 0.0 | 0.000000 |
| 1 | 1701 | 2 | 0.0 | 0.000000 |
| 2 | 1702 | 3 | 0.0 | 0.000000 |
| 3 | 1703 | 2 | 0.0 | 0.000000 |
| 4 | 1704 | 1 | 0.0 | 0.000000 |
| 5 | 1705 | 5 | 0.0 | 0.000000 |
| 6 | 1706 | 2 | 0.0 | 0.000000 |
| 7 | 1707 | 2 | 0.0 | 0.000000 |
| 8 | 1708 | 1 | 0.0 | 0.000000 |
| 9 | 1709 | 2 | 0.0 | 0.000000 |
| 10 | 1710 | 8 | 0.0 | 0.000000 |
| 11 | 1711 | 3 | 0.0 | 0.000000 |
| 12 | 1712 | 3 | 0.0 | 0.000000 |
| 13 | 1713 | 2 | 0.0 | 0.000000 |
| 14 | 1715 | 6 | 0.0 | 0.000000 |
| 15 | 1716 | 3 | 0.0 | 0.000000 |
| 16 | 1717 | 2 | 0.0 | 0.000000 |
| 17 | 1718 | 3 | 0.0 | 0.000000 |
| 18 | 1719 | 6 | 0.0 | 0.000000 |
| 19 | 1720 | 6 | 0.0 | 0.000000 |
| 20 | 1721 | 5 | 0.0 | 0.000000 |
| 21 | 1722 | 3 | 0.0 | 0.000000 |
| 22 | 1723 | 4 | 0.0 | 0.000000 |
| 23 | 1724 | 4 | 0.0 | 0.000000 |
| 24 | 1725 | 8 | 1.0 | 0.125000 |
| 25 | 1726 | 7 | 2.0 | 0.285714 |
| 26 | 1727 | 7 | 1.0 | 0.142857 |
| 27 | 1728 | 7 | 0.0 | 0.000000 |
| 28 | 1729 | 3 | 0.0 | 0.000000 |
| 29 | 1730 | 3 | 1.0 | 0.333333 |
| 30 | 1731 | 4 | 0.0 | 0.000000 |
| 31 | 1732 | 5 | 0.0 | 0.000000 |
| 32 | 1733 | 4 | 0.0 | 0.000000 |
| 33 | 1734 | 3 | 0.0 | 0.000000 |
| 34 | 1735 | 8 | 0.0 | 0.000000 |
| 35 | 1736 | 6 | 0.0 | 0.000000 |
| 36 | 1737 | 7 | 0.0 | 0.000000 |
| 37 | 1738 | 2 | 0.0 | 0.000000 |
| 38 | 1739 | 2 | 0.0 | 0.000000 |
| 39 | 1740 | 6 | 0.0 | 0.000000 |
| 40 | 1741 | 10 | 0.0 | 0.000000 |
| 41 | 1742 | 12 | 0.0 | 0.000000 |
| 42 | 1743 | 8 | 0.0 | 0.000000 |
| 43 | 1744 | 7 | 0.0 | 0.000000 |
| 44 | 1745 | 2 | 0.0 | 0.000000 |
| 45 | 1746 | 8 | 0.0 | 0.000000 |
| 46 | 1747 | 5 | 0.0 | 0.000000 |
| 47 | 1748 | 5 | 0.0 | 0.000000 |
| 48 | 1749 | 12 | 0.0 | 0.000000 |
| 49 | 1750 | 12 | 1.0 | 0.083333 |
| 50 | 1751 | 13 | 2.0 | 0.153846 |
| 51 | 1752 | 16 | 0.0 | 0.000000 |
| 52 | 1753 | 12 | 0.0 | 0.000000 |
| 53 | 1754 | 13 | 0.0 | 0.000000 |
| 54 | 1755 | 15 | 0.0 | 0.000000 |
| 55 | 1756 | 10 | 0.0 | 0.000000 |
| 56 | 1757 | 12 | 0.0 | 0.000000 |
| 57 | 1758 | 7 | 0.0 | 0.000000 |
| 58 | 1759 | 11 | 1.0 | 0.090909 |
| 59 | 1760 | 17 | 3.0 | 0.176471 |
| 60 | 1761 | 19 | 1.0 | 0.052632 |
| 61 | 1762 | 11 | 0.0 | 0.000000 |
| 62 | 1763 | 10 | 1.0 | 0.100000 |
| 63 | 1764 | 10 | 0.0 | 0.000000 |
| 64 | 1765 | 17 | 1.0 | 0.058824 |
| 65 | 1766 | 22 | 0.0 | 0.000000 |
| 66 | 1767 | 30 | 2.0 | 0.066667 |
| 67 | 1768 | 20 | 0.0 | 0.000000 |
| 68 | 1769 | 28 | 1.0 | 0.035714 |
| 69 | 1770 | 25 | 2.0 | 0.080000 |
| 70 | 1771 | 19 | 1.0 | 0.052632 |
| 71 | 1772 | 27 | 2.0 | 0.074074 |
| 72 | 1773 | 29 | 4.0 | 0.137931 |
| 73 | 1774 | 28 | 1.0 | 0.035714 |
| 74 | 1775 | 29 | 2.0 | 0.068966 |
| 75 | 1776 | 26 | 1.0 | 0.038462 |
| 76 | 1777 | 10 | 0.0 | 0.000000 |
| 77 | 1778 | 20 | 0.0 | 0.000000 |
| 78 | 1779 | 23 | 4.0 | 0.173913 |
| 79 | 1780 | 23 | 1.0 | 0.043478 |
| 80 | 1781 | 13 | 0.0 | 0.000000 |
| 81 | 1782 | 17 | 0.0 | 0.000000 |
| 82 | 1783 | 35 | 3.0 | 0.085714 |
| 83 | 1784 | 27 | 0.0 | 0.000000 |
| 84 | 1785 | 26 | 0.0 | 0.000000 |
| 85 | 1786 | 25 | 0.0 | 0.000000 |
| 86 | 1787 | 23 | 2.0 | 0.086957 |
| 87 | 1788 | 34 | 0.0 | 0.000000 |
| 88 | 1789 | 32 | 2.0 | 0.062500 |
| 89 | 1790 | 37 | 2.0 | 0.054054 |
| 90 | 1791 | 37 | 0.0 | 0.000000 |
| 91 | 1792 | 24 | 0.0 | 0.000000 |
| 92 | 1793 | 44 | 0.0 | 0.000000 |
| 93 | 1794 | 38 | 2.0 | 0.052632 |
| 94 | 1795 | 42 | 4.0 | 0.095238 |
| 95 | 1796 | 51 | 1.0 | 0.019608 |
| 96 | 1797 | 41 | 0.0 | 0.000000 |
| 97 | 1798 | 35 | 0.0 | 0.000000 |
| 98 | 1799 | 47 | 1.0 | 0.021277 |
| 99 | 1800 | 62 | 2.0 | 0.032258 |
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_MT['tag'], merged_MT['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF METATEXTUAL FOOTNOTES (by decade)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF METATEXTUAL FOOTNOTES (by decade) Pearson rank correlation: 0.806469010075845 p-value: 0.0015294267255524278
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_MT_date['tag'], merged_MT_date['footnotes_present'])
#print Pearson rank correlation and p-value
print("Correlation between NUMBER OF VOLUMES and NUMBER OF METATEXTUAL FOOTNOTES (by year)")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between NUMBER OF VOLUMES and NUMBER OF METATEXTUAL FOOTNOTES (by year) Pearson rank correlation: 0.5190495538264853 p-value: 3.154055022951281e-08
# Let's perform Pearson's test for correlation
# We assume the null hypthoesis: there is no association between the variables
#calculate Pearson correlation and corresponding p-value
rho, p = pearsonr(merged_MT['decade'], merged_MT['ratio'])
#print Pearson rank correlation and p-value
print("Correlation between DECADE and PROPORTION OF METATEXTUAL FOOTNOTES")
print("Pearson rank correlation:")
print(rho)
print("p-value:")
print(p)
Correlation between DECADE and PROPORTION OF METATEXTUAL FOOTNOTES Pearson rank correlation: 0.42965495072839 p-value: 0.16332590458793206
NOT STATISTICALLY SIGNIFICANT
While Pearson rank correlation of 0.42965495072839 indicates a weak positive correlation, the p-value of 0.16332590458793206 is not under the threshold of 0.05, indicating this is not statisticatlly significant.
Here, the null test has failed to be rejected. This does not mean that metatextual footnotes necessarily keep pace with the number of volumes per year. See below for our piecewise analysis of the trend.
# Figure out what proportion of the larger population our subset is
proportion = merged_MT['tag'].sum() / merged_MT['footnotes_present'].sum()
proportion
26.98181818181818
# Let's run a chi2 goodness of fit test
#stats.chisquare(f_obs=merged_MT['tag_y'], f_exp=merged_MT['tag_x']/proportion)
xtrain = merged_MT['decade']
ytrain = merged_MT['ratio']
model = sm.Logit(ytrain, xtrain).fit()
model.summary()
Optimization terminated successfully.
Current function value: 0.035706
Iterations 7
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/statsmodels/discrete/discrete_model.py:4465: RuntimeWarning: divide by zero encountered in scalar divide return 1 - self.llf/self.llnull
| Dep. Variable: | ratio | No. Observations: | 12 |
|---|---|---|---|
| Model: | Logit | Df Residuals: | 11 |
| Method: | MLE | Df Model: | 0 |
| Date: | Tue, 18 Mar 2025 | Pseudo R-squ.: | inf |
| Time: | 17:13:46 | Log-Likelihood: | -0.42847 |
| converged: | True | LL-Null: | 0.0000 |
| Covariance Type: | nonrobust | LLR p-value: | nan |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| decade | -0.0020 | 0.001 | -2.034 | 0.042 | -0.004 | -7.35e-05 |
fig, ax = plt.subplots(1,1, figsize = (8,6))
label = merged_MT["decade"]
x = np.arange(len(label))
rect2 = ax.bar(x,
merged_MT["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax.bar(x,
merged_MT["footnotes_present"],
label = "Metatextual footnotes in volumes",
edgecolor = "black")
#add the labels to the axis
ax.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax.set_title("Metatextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 20)
#set the ticks
ax.set_xticks(x)
ax.set_xticklabels(label)#add the legend
#using the labels of the bars
ax.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax.tick_params(axis = "x",
which = "both",
labelrotation = 90)
ax.tick_params(axis = "y",
which = "both",
labelsize = 10)
fig, (ax1, ax2) = plt.subplots(2, 1, gridspec_kw={'height_ratios': [3, 1]}, figsize = (8,8))
label = merged_MT["decade"]
x = np.arange(len(label))
rect2 = ax1.bar(x,
merged_MT["tag"],
label = "All volumes",
edgecolor = "black")
rect2 = ax1.bar(x,
merged_MT["footnotes_present"],
label = "Metatextual footnotes in volumes",
edgecolor = "black")
rect3 = ax2.plot(x,
merged_MT["footnotes_present"]/merged_MT["tag"],
label = "Proportion of Metatextual Footnotes",
marker="o",
color="black",
markeredgecolor="black")
#add the labels to the axis
ax1.set_ylabel("Number of volumes",
fontsize = 15,
labelpad = 20)
ax2.set_xlabel("Decade",
fontsize = 15,
labelpad =20)
ax1.set_title("Metatextual footnotes in one-footnote dataset vs all volumes in one-footnote dataset, by decade",
fontsize = 15,
pad = 10)
#set the ticks
ax1.set_xticks(x)
ax1.set_xticklabels(label)#add the legend
#using the labels of the bars
ax1.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax1.tick_params(axis = "x",
which = "both",
labelrotation = 0)
ax1.tick_params(axis = "y",
which = "both",
labelsize = 10)
###
#add the labels to the axis
#set the ticks
ax2.set_xticks(x)
ax2.set_xticklabels(label)#add the legend
ax2.set_ylim(0,0.4)
#using the labels of the bars
ax2.legend(#title = "Datasets",
fontsize = 12,
title_fontsize = 15)#adjust the tick paramaters
ax2.tick_params(axis = "x",
which = "both",
labelrotation = 0)
# Fitting a Linear Regression model to our data (binning by decade)
linear = LinearRegression()
linear.fit(merged_MT[['decade']], merged_MT['ratio'])
# Creating a scatterplot of our data (in blue)
ax = merged_MT.plot.scatter(x='decade', y='ratio')
# Adding our plotted linear regression model (in orange)
ax.plot(merged_MT['decade'], linear.predict(merged_MT[['decade']]), c='orange')
ax.set_ylim(0, 0.4)
linear.score(merged_MT[['decade']], merged_MT['ratio'])
0.18460337668541582
#define response variable
y = merged_MT['ratio']
#define predictor variables
x = merged_MT[['decade']]
#add constant to predictor variables
x = sm.add_constant(x)
#fit linear regression model
model = sm.OLS(y, x).fit()
#view model summary
print(model.summary())
OLS Regression Results
==============================================================================
Dep. Variable: ratio R-squared: 0.185
Model: OLS Adj. R-squared: 0.103
Method: Least Squares F-statistic: 2.264
Date: Sun, 16 Mar 2025 Prob (F-statistic): 0.163
Time: 14:22:41 Log-Likelihood: 28.287
No. Observations: 12 AIC: -52.57
Df Residuals: 10 BIC: -51.60
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
const -0.5227 0.366 -1.427 0.184 -1.339 0.293
decade 0.0003 0.000 1.505 0.163 -0.000 0.001
==============================================================================
Omnibus: 5.349 Durbin-Watson: 1.931
Prob(Omnibus): 0.069 Jarque-Bera (JB): 2.749
Skew: 1.165 Prob(JB): 0.253
Kurtosis: 3.261 Cond. No. 8.82e+04
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.82e+04. This might indicate that there are
strong multicollinearity or other numerical problems.
/Users/sierraeckert/anaconda3/lib/python3.11/site-packages/scipy/stats/_stats_py.py:1736: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=12
warnings.warn("kurtosistest only valid for n>=20 ... continuing "
#o = rb.beast( merged_MT['ratio'], start=1690, deltat= 10, season='none') # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of metatextual footnotes per decade')
#rb.print(o)
#o = rb.beast( merged_MT_date['ratio'], start=1690, season='none', period=1.0) # season='none' bcz the data has no seasonal/periodic component
#rb.plot(o, title='Proportion of metatextual footnotes per year')
#rb.print(o)
x= list(merged_MT['decade'])
y= list(merged_MT['ratio'])
pw_fit = piecewise_regression.Fit(x, y, n_breakpoints=2)
pw_fit.summary()
Breakpoint Regression Results
====================================================================================================
No. Observations 12
No. Model Parameters 6
Degrees of Freedom 6
Res. Sum of Squares 0.00474362
Total Sum of Squares 0.00772446
R Squared 0.385897
Adjusted R Squared -0.351027
Converged: True
====================================================================================================
====================================================================================================
Estimate Std Err t P>|t| [0.025 0.975]
----------------------------------------------------------------------------------------------------
const -0.68092 1.15 -0.59068 0.576 -3.5017 2.1398
alpha1 0.000406445 0.000672 0.6047 0.568 -0.0012382 0.0020511
beta1 0.00117907 0.00403 0.29237 - -0.0086889 0.011047
beta2 -0.00283999 0.00417 -0.68097 - -0.013045 0.0073649
breakpoint1 1747.71 36.5 - - 1658.4 1837.0
breakpoint2 1767.99 21.5 - - 1715.5 1820.5
----------------------------------------------------------------------------------------------------
These alphas(gradients of segments) are estimatedfrom betas(change in gradient)
----------------------------------------------------------------------------------------------------
alpha2 0.00158552 0.00398 0.39873 0.704 -0.0081445 0.011316
alpha3 -0.00125448 0.00126 -0.99763 0.357 -0.0043314 0.0018224
====================================================================================================
Davies test for existence of at least 1 breakpoint: p=0.458607 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)
'\n Breakpoint Regression Results \n====================================================================================================\nNo. Observations 12\nNo. Model Parameters 6\nDegrees of Freedom 6\nRes. Sum of Squares 0.00474362\nTotal Sum of Squares 0.00772446\nR Squared 0.385897\nAdjusted R Squared -0.351027\nConverged: True\n====================================================================================================\n====================================================================================================\n Estimate Std Err t P>|t| [0.025 0.975]\n----------------------------------------------------------------------------------------------------\nconst -0.68092 1.15 -0.59068 0.576 -3.5017 2.1398\nalpha1 0.000406445 0.000672 0.6047 0.568 -0.0012382 0.0020511\nbeta1 0.00117907 0.00403 0.29237 - -0.0086889 0.011047\nbeta2 -0.00283999 0.00417 -0.68097 - -0.013045 0.0073649\nbreakpoint1 1747.71 36.5 - - 1658.4 1837.0\nbreakpoint2 1767.99 21.5 - - 1715.5 1820.5\n----------------------------------------------------------------------------------------------------\nThese alphas(gradients of segments) are estimatedfrom betas(change in gradient)\n----------------------------------------------------------------------------------------------------\nalpha2 0.00158552 0.00398 0.39873 0.704 -0.0081445 0.011316\nalpha3 -0.00125448 0.00126 -0.99763 0.357 -0.0043314 0.0018224\n====================================================================================================\nDavies test for existence of at least 1 breakpoint: p=0.458607 (e.g. p<0.05 means reject null hypothesis of no breakpoints at 5% significance)\n\n'
# Plot the data, fit, breakpoints and confidence intervals
pw_fit.plot_data(color="grey", s=20)
# Pass in standard matplotlib keywords to control any of the plots
pw_fit.plot_fit(color="red", linewidth=2)
pw_fit.plot_breakpoints()
pw_fit.plot_breakpoint_confidence_intervals()
plt.xlabel("Decade")
plt.ylim(0, 0.4)
plt.ylabel("Proportion of volumes with footnotes")
plt.title("Proportion of Metatextual Footnotes: Trend Model")
plt.show()
plt.close()
pd.options.display.max_colwidth = 100
print('Number of volumes with metatexual footnotes in our one-footnote dataset:')
metatextual_footnotes_df['Title'].count()
Number of volumes with metatexual footnotes in our one-footnote dataset:
55
pd.set_option('display.max_rows', 200)
print('Titles of volumes with metatexual footnotes (in our one-footnote dataset):')
metatextual_footnotes_df[['Title', 'Vol_Number']]
Titles of volumes with metatexual footnotes (in our one-footnote dataset):
| Title | Vol_Number | |
|---|---|---|
| 30 | The farmer's son of Kent. A tale. In two volumes. ... | Volume 2 |
| 57 | The life of the Countess of G. By Gellert. Translated from the German, by a lady. ... | Volume 1 |
| 89 | L'art de corriger et de rendre les hommes constans. Prix 30 sols. | 0 |
| 130 | Delia, a pathetic and interesting tale. In Four Volumes ... | Volume 2 |
| 140 | Letters between an English lady and her friend at Paris. In which are contained, The memoirs of ... | Volume 1 |
| 142 | The history of a woman of quality: or, the adventures of Lady Frail. By an impartial hand. | 0 |
| 163 | He is found at last: or, memoirs of the Beverley family. In two volumes. ... | Volume 2 |
| 218 | The life and opinions of Tristram Shandy, gentleman. In three volumes. ... | Volume 5 |
| 263 | History of Lady Bettesworth and Captain Hastings. In a series of letters. In two volumes. ... | Volume 1 |
| 271 | Radzivil, a romance. Translated from the Russ of the celebrated M. Wocklow. In three volumes. ... | Volume 1 |
| 272 | The history of Sir Charles Grandison; in a series of letters. By Mr. Samuel Richardson, Author o... | Volume 1 |
| 273 | The life and opinions of Tristram Shandy, gentleman. With The Life of the Author. ... | Volume 2 |
| 274 | The life and opinions of Tristram Shandy, gentleman. With The Life of the Author. ... | Volume 3 |
| 275 | The life and opinions of Tristram Shandy, gentleman. With The Life of the Author. ... | Volume 5 |
| 276 | The life and opinions of Tristram Shandy, gentleman. With The Life of the Author. ... | Volume 8 |
| 302 | The story of the Methodist-Lady: or the injur'd husband's revenge. A true history. | 0 |
| 313 | The self-Tormentor, a novel. In three volumes. ... | Volume 2 |
| 337 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 2 |
| 340 | The female American; or, the adventures of Unca Eliza Winkfield. Compiled by herself. In two vol... | Volume 2 |
| 361 | The history of Joseph Andrews, and his friend Mr. Abraham Adams. Written in Imitation of the Man... | Volume 2 |
| 402 | The adventures of Mr. Loveill, interspers'd with many real amours of the modern polite world. ... | Volume 2 |
| 414 | Virtue made easy; or, a tablet of morality: being a collection of maxims and moral sayings. | 0 |
| 451 | Authentic and interesting memoirs of Miss Ann Sheldon; (now Mrs. Archer:) A Lady who figured, du... | Volume 1 |
| 469 | The history of Sir Charles Grandison. In a series of letters. By Mr. Samuel Richardson. In seven... | Volume 1 |
| 481 | The citizen of the world, or letters from a Chinese philosopher, residing in London, to his frie... | Volume 2 |
| 491 | The will of a certain northern vicar. The second edition, with corrections. To which is annex'd ... | 0 |
| 494 | A learned dissertation on dumpling: its dignity, antiquity, and excellence. With a word upon pud... | 0 |
| 503 | A learned dissertation on dumpling; its dignity, antiquity, and excellence. With a word upon pud... | 0 |
| 522 | Elisa Powell, or trials of sensibility: a series of original letters, collected by a Welsh curat... | Volume 2 |
| 528 | The hermit: or, the unparallel'd sufferings and surprising adventures of Mr. Philip Quarll, an E... | 0 |
| 542 | The hermit: or, the unparallel'd sufferings and surprising adventures of Mr. Philip Quarll, an E... | 0 |
| 580 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 3 |
| 585 | Letters of the Right Honourable Lady My W---y M----e: written, during her travels in Europe, Asi... | Volume 1 |
| 603 | The life and opinions of Tristram Shandy, gentleman. ... | Volume 2 |
| 619 | The citizen of the world: or, letters from a Chinese philosopher, residing in London, to his fri... | Volume 2 |
| 626 | Honny soit qui mal y pense, ou histoires des filles c<c3><a9>lebres du XVIIIe siecle. ... | Volume 2 |
| 649 | The intelligencer. By the author of a Tale of a tub. | 0 |
| 713 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 2 |
| 719 | A stage-Coach journey to Exeter. Describing the humours on the road, with the characters and adv... | 0 |
| 747 | The wonderful story teller or new pocket library of agreeable entertainment. Consisting entirely... | 0 |
| 784 | Makarony fables; Fables for grown gentlemen; Lyrick epistles; and several other poems; by the au... | 0 |
| 792 | The penitent thief: or, or, a narrative Of two women fearing God, who visited in prison a highwa... | 0 |
| 801 | Chrysal: or, the adventures of a guinea. Wherein are exhibited views of several striking scenes,... | Volume 3 |
| 806 | Alphonse D'Inange, ou le nouveau Grandisson. Seconde Partie. | Volume 4 |
| 840 | The man of feeling. | 0 |
| 904 | An affecting relation of the appearance of Thomas Ostrehan's apparition, to his friend Robert St... | 0 |
| 905 | The history of Sir Charles Grandison. In a series of letters. By Mr. Samuel Richardson, Author o... | Volume 1 |
| 917 | The life and opinions of Tristram Shandy, gentleman. ... | Volume 2 |
| 919 | The female pilgrim; or, the travels of Hephzibah. Under the similitude of a dream. In which is g... | 0 |
| 956 | Le couvent, ou histoire de Sophie Nelson. Traduit de l'anglois. Deuxieme partie. | Volume 1 |
| 987 | The woman of letters; or, the history of Miss Fanny Belton. In two volumes. ... | Volume 2 |
| 998 | A learned dissertation on dumpling; its dignity, antiquity, and excellence. With a word upon pud... | 0 |
| 1133 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 2 |
| 1134 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 3 |
| 1135 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 5 |
metatextual_footnotes_df['Filename'].to_csv('../metatextual_footnotes_filenames.csv')
metatextual_footnotes_df[['Title', 'Field_Headings']].to_csv('../metatextual_footnotes_titles.csv')
genre_term = "memoir|Memoir|memoirs"
dataframe = metatextual_footnotes_df
corpus_name = "Metatextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_metatextual_memoirs = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir|memoirs': 4 Percent of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'memoir|Memoir|memoirs': 0.07272727272727272
genre_term = "letter|Letter|lettre|Lettre"
dataframe = metatextual_footnotes_df
corpus_name = "Metatextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_metatextual_letters = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 13 Percent of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'letter|Letter|lettre|Lettre': 0.23636363636363636
genre_term = "novel|Novel|roman|Roman"
dataframe = metatextual_footnotes_df
corpus_name = "Metatextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 2 Percent of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'novel|Novel|roman|Roman': 0.03636363636363636
genre_term = "tale|Tale|recit|Recit"
dataframe = metatextual_footnotes_df
corpus_name = "Metatextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
proportion_metatextual_tale = dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count()
Number of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 6 Percent of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'tale|Tale|recit|Recit': 0.10909090909090909
genre_term = "romance|Romance"
dataframe = metatextual_footnotes_df
corpus_name = "Metatextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 1 Percent of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'romance|Romance': 0.01818181818181818
genre_term = "history|History|histories|Histories|histoire|Histoire"
dataframe = metatextual_footnotes_df
corpus_name = "Metatextual Footnotes (One-Footnote Corpus)"
print(f"Number of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count())
print(f"Percent of volumes in the {corpus_name} that contain the words '{genre_term}':")
print(dataframe[dataframe["Title"].str.contains(f"{genre_term}")]['Title'].count()/dataframe['DocumentID'].count())
Number of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 12 Percent of volumes in the Metatextual Footnotes (One-Footnote Corpus) that contain the words 'history|History|histories|Histories|histoire|Histoire': 0.21818181818181817
# Proportions of Memoirs, Letters, Tales
proportions_metatextual = [proportion_metatextual_memoirs, proportion_metatextual_letters, proportion_metatextual_tale]
proportions_metatextual
[0.07272727272727272, 0.23636363636363636, 0.10909090909090909]
Memoirs, Letters, Tales
genre_proportions = pd.DataFrame(
{'ECCO': proportions_ECCO,
'One-Footnote': proportions_one_fn,
'One-Footnote (with footnotes)': proportions_one_fn_footnoted,
'Contextual': proportions_contextual,
'Translation': proportions_translation,
'Intertextual': proportions_intertextual,
'Intratextual': proportions_intratextual,
'Metatextual': proportions_metatextual
})
genre_proportions
| ECCO | One-Footnote | One-Footnote (with footnotes) | Contextual | Translation | Intertextual | Intratextual | Metatextual | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0.055891 | 0.065364 | 0.048544 | 0.032698 | 0.036697 | 0.043243 | 0.094828 | 0.072727 |
| 1 | 0.132931 | 0.125337 | 0.135922 | 0.092643 | 0.100917 | 0.156757 | 0.241379 | 0.236364 |
| 2 | 0.077039 | 0.080189 | 0.087379 | 0.098093 | 0.100917 | 0.081081 | 0.034483 | 0.109091 |
genre_proportions_as_percentage = genre_proportions*100
genre_proportions_as_percentage
| ECCO | One-Footnote | One-Footnote (with footnotes) | Contextual | Translation | Intertextual | Intratextual | Metatextual | |
|---|---|---|---|---|---|---|---|---|
| 0 | 5.589124 | 6.536388 | 4.854369 | 3.269755 | 3.669725 | 4.324324 | 9.482759 | 7.272727 |
| 1 | 13.293051 | 12.533693 | 13.592233 | 9.264305 | 10.091743 | 15.675676 | 24.137931 | 23.636364 |
| 2 | 7.703927 | 8.018868 | 8.737864 | 9.809264 | 10.091743 | 8.108108 | 3.448276 | 10.909091 |
genre_proportions_tranposed = genre_proportions.transpose()
genre_proportions_tranposed
| 0 | 1 | 2 | |
|---|---|---|---|
| ECCO | 0.055891 | 0.132931 | 0.077039 |
| One-Footnote | 0.065364 | 0.125337 | 0.080189 |
| One-Footnote (with footnotes) | 0.048544 | 0.135922 | 0.087379 |
| Contextual | 0.032698 | 0.092643 | 0.098093 |
| Translation | 0.036697 | 0.100917 | 0.100917 |
| Intertextual | 0.043243 | 0.156757 | 0.081081 |
| Intratextual | 0.094828 | 0.241379 | 0.034483 |
| Metatextual | 0.072727 | 0.236364 | 0.109091 |
genre_proportions_tranposed.rename(columns={0: 'Memoirs', 1: 'Letters', 2: 'Tales'}, inplace=True)
genre_proportions_tranposed
| Memoirs | Letters | Tales | |
|---|---|---|---|
| ECCO | 0.055891 | 0.132931 | 0.077039 |
| One-Footnote | 0.065364 | 0.125337 | 0.080189 |
| One-Footnote (with footnotes) | 0.048544 | 0.135922 | 0.087379 |
| Contextual | 0.032698 | 0.092643 | 0.098093 |
| Translation | 0.036697 | 0.100917 | 0.100917 |
| Intertextual | 0.043243 | 0.156757 | 0.081081 |
| Intratextual | 0.094828 | 0.241379 | 0.034483 |
| Metatextual | 0.072727 | 0.236364 | 0.109091 |
genre_proportions_tranposed_as_percent = genre_proportions_tranposed*100
genre_proportions_tranposed_as_percent
| Memoirs | Letters | Tales | |
|---|---|---|---|
| ECCO | 5.589124 | 13.293051 | 7.703927 |
| One-Footnote | 6.536388 | 12.533693 | 8.018868 |
| One-Footnote (with footnotes) | 4.854369 | 13.592233 | 8.737864 |
| Contextual | 3.269755 | 9.264305 | 9.809264 |
| Translation | 3.669725 | 10.091743 | 10.091743 |
| Intertextual | 4.324324 | 15.675676 | 8.108108 |
| Intratextual | 9.482759 | 24.137931 | 3.448276 |
| Metatextual | 7.272727 | 23.636364 | 10.909091 |
genre_proportions_tranposed_as_percent.transpose()
| ECCO | One-Footnote | One-Footnote (with footnotes) | Contextual | Translation | Intertextual | Intratextual | Metatextual | |
|---|---|---|---|---|---|---|---|---|
| Memoirs | 5.589124 | 6.536388 | 4.854369 | 3.269755 | 3.669725 | 4.324324 | 9.482759 | 7.272727 |
| Letters | 13.293051 | 12.533693 | 13.592233 | 9.264305 | 10.091743 | 15.675676 | 24.137931 | 23.636364 |
| Tales | 7.703927 | 8.018868 | 8.737864 | 9.809264 | 10.091743 | 8.108108 | 3.448276 | 10.909091 |
# Let's run a chi2 goodness of fit test
stats.chisquare(f_obs=genre_proportions_tranposed['Memoirs'], f_exp=(genre_proportions_tranposed['Memoirs'].mean()))
Power_divergenceResult(statistic=0.05348566635013759, pvalue=0.9999997366379034)
# Let's run a chi2 goodness of fit test
stats.chisquare(f_obs=genre_proportions_tranposed_as_percent['Memoirs'][3:], f_exp=(genre_proportions_tranposed_as_percent['Memoirs'][3:].mean()))
Power_divergenceResult(statistic=5.113818122317871, pvalue=0.2758171494850087)
# Let's run a chi2 goodness of fit test
stats.chisquare(f_obs=genre_proportions_tranposed_as_percent['Tales'], f_exp=(genre_proportions_tranposed_as_percent['Tales'].mean()))
Power_divergenceResult(statistic=4.366475358551824, pvalue=0.7367304585784749)
# Let's run a chi2 goodness of fit test
stats.chisquare(f_obs=genre_proportions_tranposed_as_percent['Tales'][3:], f_exp=(genre_proportions_tranposed_as_percent['Tales'][3:].mean()))
Power_divergenceResult(statistic=4.215769975469303, pvalue=0.3775914223709852)
# Let's run a chi2 goodness of fit test
stats.chisquare(f_obs=genre_proportions_tranposed_as_percent['Letters'], f_exp=(genre_proportions_tranposed_as_percent['Letters'].mean()))
Power_divergenceResult(statistic=14.785427796717439, pvalue=0.03885040765985401)
# Let's run a chi2 goodness of fit test
stats.chisquare(f_obs=genre_proportions_tranposed_as_percent['Letters'][3:], f_exp=(genre_proportions_tranposed_as_percent['Letters'][3:].mean()))
Power_divergenceResult(statistic=12.278545490999921, pvalue=0.015395780036639222)
In this chi-squared test, our null hypothesis was an queaitable distribution of footntoe proportions The p-value for Letters is lower than 0.05, indicating that the distribution deviates from the expected outcome of equal proportions.
genre_proportions_tranposed['Letters'].mean()
0.15278124466812146
genre_proportions_tranposed['Tales'].mean()
0.08353392724794562
stats.ks_2samp(genre_proportions_tranposed['Memoirs'], genre_proportions_tranposed['Letters'])
KstestResult(statistic=0.875, pvalue=0.002486402486402486, statistic_location=0.07272727272727272, statistic_sign=1)
stats.ks_2samp(genre_proportions_tranposed['Memoirs'], genre_proportions_tranposed['Tales'])
KstestResult(statistic=0.75, pvalue=0.018648018648018645, statistic_location=0.07272727272727272, statistic_sign=1)
stats.ks_2samp(genre_proportions_tranposed['Letters'], genre_proportions_tranposed['Tales'])
KstestResult(statistic=0.75, pvalue=0.018648018648018645, statistic_location=0.10909090909090909, statistic_sign=-1)
genre_proportions_tranposed_one_fn = genre_proportions_tranposed.loc[['One-Footnote', 'Contextual', 'Translation', 'Intertextual', 'Intratextual','Metatextual']]
| Memoirs | Letters | Tales | |
|---|---|---|---|
| One-Footnote | 0.065364 | 0.125337 | 0.080189 |
| Contextual | 0.032698 | 0.092643 | 0.098093 |
| Translation | 0.036697 | 0.100917 | 0.100917 |
| Intertextual | 0.043243 | 0.156757 | 0.081081 |
| Intratextual | 0.094828 | 0.241379 | 0.034483 |
| Metatextual | 0.072727 | 0.236364 | 0.109091 |
genre_proportions_tranposed_one_fn_as_percent = genre_proportions_tranposed_one_fn*100
genre_proportions_tranposed_one_fn_as_percent
| Memoirs | Letters | Tales | |
|---|---|---|---|
| One-Footnote | 6.536388 | 12.533693 | 8.018868 |
| Contextual | 3.269755 | 9.264305 | 9.809264 |
| Translation | 3.669725 | 10.091743 | 10.091743 |
| Intertextual | 4.324324 | 15.675676 | 8.108108 |
| Intratextual | 9.482759 | 24.137931 | 3.448276 |
| Metatextual | 7.272727 | 23.636364 | 10.909091 |
stats.chisquare(f_obs=genre_proportions_tranposed_one_fn_as_percent['Memoirs'], f_exp=(genre_proportions_tranposed_one_fn_as_percent['Memoirs'].mean()))
Power_divergenceResult(statistic=5.101642785598843, pvalue=0.4036020303770298)
stats.chisquare(f_obs=genre_proportions_tranposed_one_fn_as_percent['Letters'], f_exp=(genre_proportions_tranposed_one_fn_as_percent['Letters'].mean()))
Power_divergenceResult(statistic=13.647924522688669, pvalue=0.018007499282828337)
stats.chisquare(f_obs=genre_proportions_tranposed_one_fn_as_percent['Tales'], f_exp=(genre_proportions_tranposed_one_fn_as_percent['Tales'].mean()))
Power_divergenceResult(statistic=4.2742848540647564, pvalue=0.5106385567502736)
stats.ks_2samp(genre_proportions_tranposed_one_fn_as_percent['Memoirs'], genre_proportions_tranposed_one_fn_as_percent['Letters'])
KstestResult(statistic=0.8333333333333334, pvalue=0.025974025974025972, statistic_location=7.2727272727272725, statistic_sign=1)
stats.ks_2samp(genre_proportions_tranposed_one_fn_as_percent['Memoirs'], genre_proportions_tranposed_one_fn_as_percent['Tales'])
KstestResult(statistic=0.6666666666666666, pvalue=0.14285714285714285, statistic_location=7.2727272727272725, statistic_sign=1)
stats.ks_2samp(genre_proportions_tranposed_one_fn_as_percent['Letters'], genre_proportions_tranposed_one_fn_as_percent['Tales'])
KstestResult(statistic=0.6666666666666666, pvalue=0.14285714285714285, statistic_location=10.909090909090908, statistic_sign=-1)
stats.ks_2samp(genre_proportions_tranposed['Memoirs'][2:], genre_proportions_tranposed['Letters'][2:])
KstestResult(statistic=0.8333333333333334, pvalue=0.025974025974025972, statistic_location=0.07272727272727272, statistic_sign=1)
stats.ks_2samp(genre_proportions_tranposed['Memoirs'][1:], genre_proportions_tranposed['Tales'][1:])
KstestResult(statistic=0.7142857142857143, pvalue=0.05303030303030303, statistic_location=0.07272727272727272, statistic_sign=1)
stats.ks_2samp(genre_proportions_tranposed['Letters'][1:], genre_proportions_tranposed['Tales'][1:])
KstestResult(statistic=0.7142857142857143, pvalue=0.05303030303030303, statistic_location=0.10909090909090909, statistic_sign=-1)
What tags appear with other tags?
multi_tagged_footnotes
| Count | Tag | |
|---|---|---|
| 0 | 103 | FN, C |
| 1 | 91 | CW |
| 2 | 82 | C |
| 3 | 64 | C, T |
| 4 | 51 | IT |
| ... | ... | ... |
| 404 | 1 | LB, CW |
| 405 | 1 | CW, PO |
| 406 | 1 | LB, TY, CW, SM |
| 407 | 1 | BQ, P, CW, TY, PB |
| 408 | 1 | C, IT, IN, MT |
409 rows × 2 columns
# Let's look at the CW tag
pd.set_option('display.max_rows', 500)
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("CW")].Tag.value_counts()
BQ, P, CW, TY 13 BQ, P, CW, TY, SM 11 BQ, P, CW, TY, PB 11 CW, PB, TY, O 7 CW, PB, TY 6 SM, PB, CW 6 LB, P, TY, CW 5 CW, LB, TY 5 TY, CW 5 SM, CW, TY, O 4 PB, SM, CW, TY 4 SE, SM, CW 4 CW, SM, TY 4 RQ, CW 3 SM, CW 3 M, CW 3 BQ, CW, TY 3 BQ, P, CW, SM 3 TE, CW, TY 3 CW, TY, O 3 SE, CW 3 LB, SM, CW 3 LS, CW 3 CW, PO, TY 3 CW, IN 2 CW, P, TY 2 LB, TY, CW, SM 2 SM, CW, LB, P, TY 2 BQ, P, CW, SM, TY, O 2 PB, P, CW 2 BQ, CW, TY, PB 2 LB, SE, CW 2 CW, SD 2 D, CW, TY, PB 2 CW, D 2 CW, D, TY 2 BQ, CW 2 CW 2 BQ, P, CW 2 BQ, CW, SM, TY 2 CW, PB 2 CW, PO 2 LB, CW 2 P, LB, RQ, CW 2 RQ, LB, SM, CW 1 LB, CW,NN 1 CW, BQ, P, TY, M 1 CW, LB ,NN 1 PB, TY, PO, CW, O 1 SM, CW,TY, LB 1 SM, TE, CW, TY 1 PO, CW, TY, SM 1 CW, TY, M 1 RQ, LB, CW 1 LS, CW, TY 1 CW, P 1 FN, CW, SM,IT 1 D, LB, CW 1 M, CW, SM 1 BQ, P, TY, CW, SM, PB 1 BP, P, TY, CW 1 LB, P, IT, CW 1 BQ, P, SM, CW, PB 1 RQ, LB, CW, TY 1 PB, CW, TY, PO 1 CH, CW, TY 1 BQ, P, LB, CW 1 I, CW 1 RQ, BQ, TY, P, SM, CW 1 PO, SM, CW 1 SM, CW, BQ 1 TE, CW, LB, TY 1 P, TY, BQ, CW, PB, O 1 CW, C, T 1 PO, SM, CW, TY, PB 1 TY, SE, CW, LB 1 CW, LS,NN 1 P, TY, CW, SM, BQ, LB 1 TE, CW, SM 1 CW, SD, TY 1 RQ, TY, CW 1 CW, PE 1 CW, O 1 CW, TY, BQ, P, PO, PB, O 1 LS, CW, PB 1 SM, CW, PB, O 1 P, LB, SM, CW 1 P, LB, CW 1 SM, CW, O 1 LB, TY, CW,NN 1 PO, TY, P, BQ, CW 1 BQ, LB, CW,NN 1 P, SM, CW, TY 1 CW, C, IT 1 I, O, CW 1 RQ, P, LB, CW, SM 1 RQ, SM, CW, SE, PE 1 PB, C, T, IT, CW 1 CW, D, SE 1 SM, CW, BQ, PB, O 1 SE, SM, CW,NN 1 BQ, P, LB, CW, SM 1 LB, CW,TY,NN 1 BQ, PB, CW 1 RQ, SE, CW 1 CW, PB, O 1 CW, PB, SE, TY 1 LB, CW,TY 1 Name: Tag, dtype: int64
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("CW")].Tag.value_counts().plot(kind="barh", title="What tags appear with the tag 'CW'?", figsize=(10, 20))\
.set_ylabel("Tags")
plt.legend(["Number of volumes"])
<matplotlib.legend.Legend at 0x141b4a710>
# Let's look at the TY tag
pd.set_option('display.max_rows', 500)
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("TY")].Tag.value_counts()
BQ, P, CW, TY 13 BQ, P, CW, TY, PB 11 BQ, P, CW, TY, SM 11 CW, PB, TY, O 7 BQ, P, TY 7 CW, PB, TY 6 TY, CW 5 LB, P, TY, CW 5 CW, LB, TY 5 PB, SM, CW, TY 4 SM, CW, TY, O 4 LS, TY, TE, PB 4 LB, P, TY 4 CW, SM, TY 4 O, TE, PO, TY 3 CW, PO, TY 3 O, TY, PB 3 CW, TY, O 3 LB, TY, SM 3 BQ, CW, TY 3 TY, LB 3 TE, CW, TY 3 SE , TY 2 TE, TY 2 PB, TY, TE 2 LB, TY, CW, SM 2 CW, D, TY 2 BQ, TY 2 RQ, BQ, TY, P 2 BQ, P, CW, SM, TY, O 2 LB, LS, TY 2 BQ, P, TY, LB 2 SM, CW, LB, P, TY 2 SM, TY 2 BQ, CW, TY, PB 2 BQ, CW, SM, TY 2 O, TY, TE 2 TE, LS, TY 2 O, TY 2 TY, LB, NN 2 CW, P, TY 2 D, CW, TY, PB 2 BQ, P, TY, SM 2 BQ, P, TY, TE 2 PB, TY 2 RQ, LB, CW, TY 1 TE, TY, LB 1 TY, P, PB, TE, LS 1 CH, CW, TY 1 BP, P, TY, CW 1 LS, CW, TY 1 SM, TY, O 1 TE, TY, LB, P 1 BQ, P, TY, CW, SM, PB 1 CW, BQ, P, TY, M 1 TE, PB, P, O, TY 1 SE, SM, D, PB, TY 1 PB, TY, PO, CW, O 1 SM, TE, CW, TY 1 TY, PB, D 1 TE, TY, LD, SE 1 PB, CW, TY, PO 1 RQ, BQ, TY, P, SM, CW 1 SM, CW,TY, LB 1 SM, TY, PB 1 LS, LB, TY,NN 1 O, TY, PO 1 SE, TY, LB 1 PO, PB, TY, O 1 BQ, TY, LB, T 1 TE. LS, TY, LB 1 P, TY, SM 1 BQ, P, TY,NN 1 PO, LB, TY 1 CW, TY, M 1 TY 1 TE, PO, TY 1 TE, TY, PB, O 1 LS, TE, TY, PB, O 1 CW, TY, BQ, P, PO, PB, O 1 RQ, TY, CW 1 CW, SD, TY 1 TE, SE, TY 1 P, TY, CW, SM, BQ, LB 1 TE, TY, M 1 TY,BQ, O 1 TY, SE, CW, LB 1 PO, SM, CW, TY, PB 1 LB, TY, CW,NN 1 PO, TY, P, BQ, CW 1 CW, PB, SE, TY 1 TE, LS, P, TY 1 TE, CW, LB, TY 1 P, BQ, TY, PB 1 O, LS, TY 1 LB, CW,TY,NN 1 BQ , TY,NN 1 P, SM, CW, TY 1 P, TY, BQ, CW, PB, O 1 TE, TY, SM 1 TY, LB, P,NN 1 TY, PB, SM, O 1 RQ,TY, LB,NN 1 PO, CW, TY, SM 1 LB, CW,TY 1 Name: Tag, dtype: int64
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("TY")].Tag.value_counts().plot(kind="barh", title="What tags appear with the tag 'TY?'", figsize=(10, 20))\
.set_ylabel("Tags")
plt.legend(["Number of volumes"])
<matplotlib.legend.Legend at 0x141037990>
# Let's look at the PB tag
pd.set_option('display.max_rows', 500)
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("PB")].Tag.value_counts()
BQ, P, CW, TY, PB 11 CW, PB, TY, O 7 CW, PB, TY 6 SM, PB, CW 6 LS, TY, TE, PB 4 PB, SM, CW, TY 4 O, TY, PB 3 BQ, CW, TY, PB 2 PB, TY, TE 2 D, CW, TY, PB 2 PB, P, CW 2 PB, PO 2 CW, PB 2 PB, TY 2 TY, PB, SM, O 1 LB, PB 1 PO, PB, TY, O 1 SM, TY, PB 1 PB, P 1 TE, PB, P, O, TY 1 BQ, P, SM, CW, PB 1 BQ, P, TY, CW, SM, PB 1 TY, P, PB, TE, LS 1 SE, SM, D, PB, TY 1 LB, PB, SM 1 PB, CW, TY, PO 1 TY, PB, D 1 BQ, PB, CW 1 P, TY, BQ, CW, PB, O 1 CW, TY, BQ, P, PO, PB, O 1 TE, TY, PB, O 1 PB 1 LS, TE, TY, PB, O 1 SM, CW, PB, O 1 RQ, PB 1 LS, CW, PB 1 PO, SM, CW, TY, PB 1 PB, C, T, IT, CW 1 CW, PB, SE, TY 1 CW, PB, O 1 P, BQ, TY, PB 1 O, PB 1 PO, SM, PB 1 SM, CW, BQ, PB, O 1 PB, TY, PO, CW, O 1 Name: Tag, dtype: int64
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("PB")].Tag.value_counts().plot(kind="barh", title="What tags appear with the tag 'PB'?", figsize=(10, 15))\
.set_ylabel("Tags")
plt.legend(["Number of volumes"])
<matplotlib.legend.Legend at 0x1439fe190>
# Let's look at the BQ tag
pd.set_option('display.max_rows', 500)
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("BQ")].Tag.value_counts()
BQ, P, CW, TY 13 BQ, P, CW, TY, PB 11 BQ, P, CW, TY, SM 11 BQ, P, TY 7 BQ, P, CW, SM 3 BQ, CW, TY 3 BQ, CW 2 BQ, CW, TY, PB 2 BQ, P, TY, LB 2 RQ, BQ, TY, P 2 BQ, P, CW, SM, TY, O 2 BQ, TY 2 BQ, P, TY, TE 2 BQ, P, TY, SM 2 BQ, CW, SM, TY 2 BQ, P, CW 2 RQ, BQ, P 1 CW, BQ, P, TY, M 1 RQ, BQ, TY, P, SM, CW 1 BQ, P, LB, CW 1 BQ, TY, LB, T 1 BQ, P, SM, CW, PB 1 BQ, P, TY, CW, SM, PB 1 BQ, NY, LB 1 BQ, SE,NN 1 BQ, P, SE 1 BQ, PB, CW 1 SM, CW, BQ 1 P, TY, BQ, CW, PB, O 1 SM, CW, BQ, PB, O 1 BQ, P, LB, CW, SM 1 BQ , TY,NN 1 BQ, P, TY,NN 1 P, BQ, TY, PB 1 BQ, LB, CW,NN 1 PO, TY, P, BQ, CW 1 TY,BQ, O 1 P, TY, CW, SM, BQ, LB 1 BQ 1 CW, TY, BQ, P, PO, PB, O 1 BQ, P 1 BQ, LB 1 Name: Tag, dtype: int64
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("BQ")].Tag.value_counts().plot(kind="barh", title="What tags appear with the tag 'BQ'?'", figsize=(10, 10))\
.set_ylabel("Tags")
plt.legend(["Number of volumes"])
<matplotlib.legend.Legend at 0x143e16890>
# Let's look at the SM tag
pd.set_option('display.max_rows', 500)
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("SM")].Tag.value_counts()
BQ, P, CW, TY, SM 11 SM, PB, CW 6 CW, SM, TY 4 SM, CW, TY, O 4 SE, SM, CW 4 PB, SM, CW, TY 4 SM, CW 3 LB, TY, SM 3 LB, SM, CW 3 BQ, P, CW, SM 3 BQ, P, TY, SM 2 BQ, P, CW, SM, TY, O 2 LB, TY, CW, SM 2 SM, CW, LB, P, TY 2 SM, TY 2 BQ, CW, SM, TY 2 TY, PB, SM, O 1 P, TY, SM 1 FN, CW, SM,IT 1 BQ, P, TY, CW, SM, PB 1 SM, TE, CW, TY 1 BQ, P, SM, CW, PB 1 SM, TY, O 1 SM, TY, PB 1 D, SM 1 PO, SM, CW 1 M, CW, SM 1 RQ, BQ, TY, P, SM, CW 1 RQ, LB, SM, CW 1 SM, CW,TY, LB 1 LB, PB, SM 1 SE, SM, D, PB, TY 1 PO, CW, TY, SM 1 SM, CW, BQ, PB, O 1 SM, CW, BQ 1 SE, SM, CW,NN 1 SM, CW, O 1 SM, O 1 P, LB, SM, CW 1 SM, CW, PB, O 1 TE, CW, SM 1 P, TY, CW, SM, BQ, LB 1 PO, SM, CW, TY, PB 1 PO, SM, PB 1 TE, TY, SM 1 SM, LB 1 BQ, P, LB, CW, SM 1 SM 1 P, SM, CW, TY 1 RQ, SM, CW, SE, PE 1 RQ, P, LB, CW, SM 1 LS, SM 1 SE, SM, P,NN 1 Name: Tag, dtype: int64
multi_tagged_footnotes[multi_tagged_footnotes['Tag'].astype(str).str.contains("SM")].Tag.value_counts().plot(kind="barh", title="What tags appear with the tag 'SM?'", figsize=(10, 10))\
.set_ylabel("Tags")
plt.legend(["Number of volumes"])
<matplotlib.legend.Legend at 0x143f3cc90>
# Let's check how 'CW' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'CW'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'CW' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'CW'"])
<matplotlib.legend.Legend at 0x1436cbf90>
# Let's check how 'SM' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'SM'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'SM' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'SM'"])
<matplotlib.legend.Legend at 0x1440d9290>
# Let's check how 'TE' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'TE'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'TE' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'TE'"])
<matplotlib.legend.Legend at 0x144164c90>
# Let's check how 'TE' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'TY'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'TY' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'TY'"])
<matplotlib.legend.Legend at 0x143ff0650>
# Let's check how 'PB' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'PB'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'PB' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'PB'"])
<matplotlib.legend.Legend at 0x144296f90>
# Let's check how 'BQ' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'BQ'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'BQ' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'BQ'"])
<matplotlib.legend.Legend at 0x14429eed0>
# Let's check how 'P' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'P'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'P' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'P'"])
<matplotlib.legend.Legend at 0x144347cd0>
# Let's check how 'D' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'D'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'D' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'D'"])
<matplotlib.legend.Legend at 0x14444dbd0>
# Let's check how 'PO' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'PO'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'PO' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'PO'"])
<matplotlib.legend.Legend at 0x144467410>
# Let's check how 'LS' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'LS'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'LS' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'LS'"])
<matplotlib.legend.Legend at 0x144508b10>
# Let's check how 'SE' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'SE'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'SE' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'SE'"])
<matplotlib.legend.Legend at 0x1445d3510>
# Let's check how 'PE' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'PE'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'PE' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'PE'"])
<matplotlib.legend.Legend at 0x14464a990>
# Let's check how the 'O' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'O'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'O' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'O'"])
<matplotlib.legend.Legend at 0x1446ecc90>
# Let's check how the 'M' tag is distributed in our corpus
ax = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['tag'] == 'M'].groupby('decade')[['tag']].count().plot(kind="bar", figsize=(10, 5), title="How many 'M' tags are in our one-footnote dataset, by decade?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
ax.set_xlabel("Decade")
ax.set_ylabel("Number of volumes")
plt.legend(["Volumes with tag 'M'"])
<matplotlib.legend.Legend at 0x1446a2ed0>
volumes_of_1750s = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['decade'] == 1750]
ax = volumes_of_1750s.groupby('tag')['Title'].count().plot(kind='bar', title="What are the tags for 1750s volumes?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
volumes_of_1760s = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['decade'] == 1760]
ax = volumes_of_1760s.groupby('tag')['Title'].count().plot(kind='bar', title="What are the tags for 1760s volumes?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
volumes_of_1770s = one_footnote_tagged_MR_df[one_footnote_tagged_MR_df['decade'] == 1770]
ax = volumes_of_1770s.groupby('tag')['Title'].count().plot(kind='bar', title="What are the tags for 1770s volumes?")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
Where within novels do instances of single footnotes appear? And in the case of multi-volume works, does our dataset contain more instances of footnotes in certain volumes?
## (We'll need to exclude intsancew where there are 2, 3, 4, or 5 footnotes on a single page)
print('Number of volumes in our one-footnote dataset with exactly one footnote:')
one_footnote_df[one_footnote_df['footnotes_present'] == 1].footnotes_present.count()
Number of volumes in our one-footnote dataset with exactly one footnote:
582
print('Where do single footnotes appear inv various volumes?')
one_footnote_df[one_footnote_df['footnotes_present'] == 1].Vol_Number.value_counts()
Where do single footnotes appear inv various volumes?
0 231 Volume 2 141 Volume 1 122 Volume 3 35 Volume 4 18 Volume 5 17 Volume 6 7 Volume 7 5 Volume 8 3 Volume 12 1 Volume 22 1 Volume 15 1 Name: Vol_Number, dtype: int64
# Turn the table above into a dataframe
vol_dist_df = one_footnote_df[one_footnote_df['footnotes_present'] == 1].Vol_Number.value_counts()
# Plot our single footnotes data as a pie chart
ax = vol_dist_df.plot(kind='pie', autopct='%.2f', labels=None, figsize=(11,6), title="Where in novels do single footnotes appear?")
ax.legend(loc=1, labels=vol_dist_df.index)
<matplotlib.legend.Legend at 0x144aa3fd0>
# Plot our single footnotes data as bar chart
ax = vol_dist_df.plot(kind='bar', figsize=(11,6), title="Where in novels do single footnotes appear?")
ax.set_xlabel("Volume Number")
ax.set_ylabel("Number of footnotes that appear in a given volume")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
Let's look at the same dataset, but filter out all the single-volume works, which appear as "0" in our dataset.
#Let's drop the 0 values
multi_vol_dist_df = vol_dist_df[1:]
print('Where do single footnoes appear inv various volumes?')
multi_vol_dist_df
Where do single footnoes appear inv various volumes?
Volume 2 141 Volume 1 122 Volume 3 35 Volume 4 18 Volume 5 17 Volume 6 7 Volume 7 5 Volume 8 3 Volume 12 1 Volume 22 1 Volume 15 1 Name: Vol_Number, dtype: int64
# Plot our single footnotes data as a pie chart
ax = multi_vol_dist_df.plot(kind='pie', autopct='%.2f', labels=None, figsize=(11,6), title="Where in novels do single footnotes appear? (multi-volume works only)")
ax.legend(loc=1, labels=multi_vol_dist_df.index)
<matplotlib.legend.Legend at 0x144a84410>
# Plot our single footnotes data as bar chart
ax = multi_vol_dist_df.plot(kind='bar', figsize=(11,6), title="Where in novels do single footnotes appear? (multi-volume works only)")
ax.set_xlabel("Volume Number")
ax.set_ylabel("Number of footnotes that appear in a given volume")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
# Let's compare these distributions with the general distribution of volumes
# What is the distribution of volumes in our one-footnote corpus that RB and NA have verfied have footnotes present?
print("Distriubtion of volumes in our one-footnote subset with footnotes present")
print ("(includes volumes that RB and NA have verfied DO have footnotes present, '0' = single volume):")
vol_dist_df
Distriubtion of volumes in our one-footnote subset with footnotes present (includes volumes that RB and NA have verfied DO have footnotes present, '0' = single volume):
0 231 Volume 2 141 Volume 1 122 Volume 3 35 Volume 4 18 Volume 5 17 Volume 6 7 Volume 7 5 Volume 8 3 Volume 12 1 Volume 22 1 Volume 15 1 Name: Vol_Number, dtype: int64
# Now let's compare that to the one-footnote subset as a whole
# (this includes volumes identified by computer vision as having footnotes that did not in facct have footnotes
print("Distriubtion of volumes in our one-footnote subset")
print("(includes ALL volumes identified by algorithm):")
one_footnote_df.Vol_Number.value_counts()
Distriubtion of volumes in our one-footnote subset (includes ALL volumes identified by algorithm):
0 537 Volume 2 370 Volume 1 332 Volume 3 124 Volume 4 53 Volume 5 32 Volume 6 11 Volume 7 10 Volume 8 8 Volume 9 3 Volume 12 1 Volume 22 1 Volume 10 1 Volume 15 1 Name: Vol_Number, dtype: int64
print("Distriubtion of volumes in ECCO footnote corpus NOT in one-footnote subset:")
footnotes_df[footnotes_df['fn_pages'] != 1].Vol_Number.value_counts()
Distriubtion of volumes in ECCO footnote corpus NOT in one-footnote subset:
0 3247 Volume 1 1361 Volume 2 1305 Volume 3 489 Volume 4 331 Volume 5 111 Volume 6 78 Volume 7 58 Volume 8 41 Volume 9 17 Volume 10 16 Volume 12 10 Volume 11 10 Volume 14 7 Volume 16 7 Volume 13 6 Volume 17 6 Volume 15 5 Volume 18 5 Volume 19 4 Volume 20 2 Volume 21 2 Volume 23 2 Volume 22 1 Volume 24 1 Name: Vol_Number, dtype: int64
ax = footnotes_df.Vol_Number.value_counts().plot(kind='pie', autopct='%.2f', labels=None, figsize=(11,6), title="What is the distribution of volumes in our corpus?")
ax.legend(loc=1, labels=footnotes_df.Vol_Number.value_counts().index)
<matplotlib.legend.Legend at 0x144b4e890>
Because we don't want to compare the one-footnote subset to the whole footnotes dataframe (which would muck up any attempt to run statistical tests to see if there's evidence of a difference, we're going to compare the distribution of volumes in our one footnote dataset to the corpus as a whole
# Let's make sure mark the absence of footnotes in volumes 9-11, 13, 14, 16-21, 23, 24)
vols_dist_in_one_footnote_corpus_with_footnotes_present = vol_dist_df
vols_dist_in_footnotes_corpus_excluding_one_footnote_subset = footnotes_df[footnotes_df['fn_pages'] != 1].Vol_Number.value_counts()
vols_dist_in_one_footnote_corpus_with_footnotes_present_series = pd.Series(vols_dist_in_one_footnote_corpus_with_footnotes_present)
vols_dist_in_footnotes_corpus_excluding_one_footnote_subset_series = pd.Series(vols_dist_in_footnotes_corpus_excluding_one_footnote_subset)
test_1 = vols_dist_in_one_footnote_corpus_with_footnotes_present
test_1.append(pd.Series(0, index=['Volume 9']))
test_1 = test_1.append(pd.Series(0, index=['Volume 9']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/1437714004.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1.append(pd.Series(0, index=['Volume 9'])) /var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/1437714004.py:2: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 9']))
test_1 = test_1.append(pd.Series(0, index=['Volume 10']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/2812324794.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 10']))
test_1 = test_1.append(pd.Series(0, index=['Volume 11']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/3196432227.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 11']))
test_1 = test_1.append(pd.Series(0, index=['Volume 13']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/3833397998.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 13']))
test_1 = test_1.append(pd.Series(0, index=['Volume 14']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/2755060057.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 14']))
test_1 = test_1.append(pd.Series(0, index=['Volume 16']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/3855072254.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 16']))
test_1 = test_1.append(pd.Series(0, index=['Volume 17']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/952062530.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 17']))
test_1 = test_1.append(pd.Series(0, index=['Volume 18']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/2656581165.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 18']))
test_1 = test_1.append(pd.Series(0, index=['Volume 19']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/1036213031.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 19']))
test_1 = test_1.append(pd.Series(0, index=['Volume 20']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/1544008085.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 20']))
test_1 = test_1.append(pd.Series(0, index=['Volume 21']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/2714299107.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 21']))
test_1 = test_1.append(pd.Series(0, index=['Volume 23']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/363136250.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 23']))
test_1 = test_1.append(pd.Series(0, index=['Volume 24']))
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/280527973.py:1: FutureWarning: The series.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. test_1 = test_1.append(pd.Series(0, index=['Volume 24']))
vols_dist_in_one_footnote_corpus_with_footnotes_present = test_1
The standard threshold for confidence in variance is a p-value of < 0.05
If the p-value:
#f_val, p_val = stats.f_oneway(vols_dist_in_one_footnote_corpus_with_footnotes_present, vols_dist_in_footnotes_corpus_excluding_one_footnote_subset)
#print("One-way ANOVA test comparing one-footnote subset with ECCO footnotes corpus")
#print("p-value:", p_val)
We have weak statistical evidence. If we set our standard of significance at 5%, this would fall below statistical significance. If we set level of significance at 10%, then this would be statistically significant.
This is just another way of running the t-test (which the ANOVA test is a version of). Again, standard threshold for difference is 0.05.
stats.ttest_ind(vols_dist_in_footnotes_corpus_excluding_one_footnote_subset, vols_dist_in_one_footnote_corpus_with_footnotes_present)
Ttest_indResult(statistic=1.1260904278537194, pvalue=0.2677895775992684)
If the correltation coefficient is close to +1: Large Positive relationship, close to -1: Large Negative relationship is close to 0: No relationship.
vols_dist_in_footnotes_corpus_excluding_one_footnote_subset = footnotes_df[footnotes_df['fn_pages'] != 1].Vol_Number.value_counts()
#vols_dist_in_one_footnote_corpus_with_footnotes_present = vol_dist_df
print("Correlation coefficient and p-value comparing one-footnote subset with ECCO footnotes corpus:")
stats.pearsonr(vols_dist_in_footnotes_corpus_excluding_one_footnote_subset, vols_dist_in_one_footnote_corpus_with_footnotes_present)
Correlation coefficient and p-value comparing one-footnote subset with ECCO footnotes corpus:
PearsonRResult(statistic=0.9853346738182929, pvalue=3.628243739690664e-19)
vols_dist_in_one_footnote_corpus_with_footnotes_present_series = pd.Series(vols_dist_in_one_footnote_corpus_with_footnotes_present)
vols_dist_in_footnotes_corpus_excluding_one_footnote_subset_series = pd.Series(vols_dist_in_footnotes_corpus_excluding_one_footnote_subset)
array1 = np.array([vols_dist_in_one_footnote_corpus_with_footnotes_present_series])
array2 = np.array([vols_dist_in_footnotes_corpus_excluding_one_footnote_subset_series])
final_array = np.concatenate((array1, array2))
print(final_array)
[[ 231 141 122 35 18 17 7 5 3 1 1 1 0 0
0 0 0 0 0 0 0 0 0 0 0]
[3247 1361 1305 489 331 111 78 58 41 17 16 10 10 7
7 6 6 5 5 4 2 2 2 1 1]]
chi2, p, dof, expected = chi2_contingency(final_array)
print("Chi2 Test for volumes distribution in general footnotes corpus and one-footnote subset")
print(f"chi2 statistic: {chi2:.5g}")
print(f"p-value: {p:.5g}")
print(f"degrees of freedom: {dof}")
print("expected frequencies:")
print(expected)
Chi2 Test for volumes distribution in general footnotes corpus and one-footnote subset chi2 statistic: 27.906 p-value: 0.26405 degrees of freedom: 24 expected frequencies: [[2.62746106e+02 1.13468847e+02 1.07802960e+02 3.95856698e+01 2.63652648e+01 9.66978193e+00 6.42133956e+00 4.75934579e+00 3.32398754e+00 1.35981308e+00 1.28426791e+00 8.30996885e-01 7.55451713e-01 5.28816199e-01 5.28816199e-01 4.53271028e-01 4.53271028e-01 3.77725857e-01 3.77725857e-01 3.02180685e-01 1.51090343e-01 1.51090343e-01 1.51090343e-01 7.55451713e-02 7.55451713e-02] [3.21525389e+03 1.38853115e+03 1.31919704e+03 4.84414330e+02 3.22634735e+02 1.18330218e+02 7.85786604e+01 5.82406542e+01 4.06760125e+01 1.66401869e+01 1.57157321e+01 1.01690031e+01 9.24454829e+00 6.47118380e+00 6.47118380e+00 5.54672897e+00 5.54672897e+00 4.62227414e+00 4.62227414e+00 3.69781931e+00 1.84890966e+00 1.84890966e+00 1.84890966e+00 9.24454829e-01 9.24454829e-01]]
one_footnote_present = one_footnote_df[one_footnote_df['footnotes_present'] == 1]
one_footnote_present[one_footnote_present['Vol_Number'] == 'Volume 2']
| Unnamed: 0 | Filename | DocumentID | ESTC_ID | Date | Title | Vol_Number | Author | Imprint | Field_Headings | fn_pages | nofn_pages | TableName | fn.percent | decade | footnotes_present | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10 | 476 | 0000500202.xml | 500202 | T131172 | 1765 | Lettres de Sophie et du Chevalier de **, pour servir de suppl<c3><a9>ment aux Lettres du Marquis... | Volume 2 | Desfontaines, M. (Guillaume Fran<c3><a7>ois Fouques Deshayes) | A Londres : et se trouvent <c3><a0> Paris, chez L'Esclapart, Libraire, Quai de G<c3><aa>vres, M.... | Epistolary fiction, French, Early works to 1800 | 1 | 278 | Manifest_LitAndLang1 | 0.003584 | 1760 | 1 |
| 13 | 483 | 0000500702.xml | 500702 | T131197 | 1759 | Female banishment: or, the woman hater. Originally wrote by the Chevalier de Mouhy, ... In two v... | Volume 2 | Mouhy, Charles de Fieux, chevalier de | London : printed for T. Lownds, 1759. | Exile (Punishment), Early works to 1800, Women, Early works to 1800 | 1 | 296 | Manifest_LitAndLang1 | 0.003367 | 1750 | 1 |
| 25 | 524 | 0000900502.xml | 900502 | T129718 | 1794 | The confessions of James Baptiste Couteau, citizen of France, written by himself: and translated... | Volume 2 | Jephson, Robert | London : printed for J. Debrett, 1794. | France, History, Revolution, 1789-1799, Fiction | 1 | 239 | Manifest_LitAndLang1 | 0.004167 | 1790 | 1 |
| 30 | 543 | 0001100402.xml | 1100402 | T100068 | 1769 | The farmer's son of Kent. A tale. In two volumes. ... | Volume 2 | Anon | London : printed for Francis Noble, at his Circulating Library near Middle Row, Holborn; and Joh... | English fiction, 18th century | 1 | 234 | Manifest_LitAndLang1 | 0.004255 | 1760 | 1 |
| 34 | 564 | 0001300602.xml | 1300602 | T098006 | 1767 | Le ministre de Wakefield, histoire suppos<c3><a9>e <c3><a9>crite par lui-m<c3><aa>me. ... | Volume 2 | Goldsmith, Oliver | Londres [i.e. Paris?] : chez Pissot, Desaint, 1767. | France, History, 18th century, France, Politics and government, 18th century | 1 | 240 | Manifest_LitAndLang1 | 0.004149 | 1760 | 1 |
| 49 | 634 | 0005300302.xml | 5300302 | T072280 | 1753 | The life and strange surprising adventures of Robinson Crusoe; of York, mariner: who lived eight... | Volume 2 | Defoe, Daniel | London : printed for T. and T. Longman, C. Hitch and L. Hawes, J. Hodges, B. Dod, J. and J. Rivi... | Survival after airplane accidents, shipwrecks, etc., Fiction | 1 | 282 | Manifest_LitAndLang1 | 0.003534 | 1750 | 1 |
| 53 | 645 | 0005400502.xml | 5400502 | T092965 | 1762 | The country seat; or, summer evenings entertainments. Translated from the French. In two volumes... | Volume 2 | Anon | London : printed: and sold by T. Lownds in Fleet Street, MDCCLXII. [1762]. | French fiction, 18th century, Translations into English | 1 | 233 | Manifest_LitAndLang1 | 0.004274 | 1760 | 1 |
| 56 | 658 | 0005500302.xml | 5500302 | T129707 | 1792 | Cyanna of Athens. A Grecian romance. In two volumes. ... From the French of the Baron de Bilderbec. | Volume 2 | Bilderbeck, Ludwig Benedict Franz, Freiherr von | London : printed for T. Axtell, 1792. | Greek fiction, Translations into English, 18th century | 1 | 180 | Manifest_LitAndLang1 | 0.005525 | 1790 | 1 |
| 69 | 5118 | 0006001102.xml | 6001102 | T093334 | 1795 | Memoirs of Madame de Barneveldt. Translated from the French by Miss Gunning. In two volumes. ... | Volume 2 | Auvigny, M. d' (Jean Du Castre) | London : printed by and for S. Low, Berwick Street, Soho; and E. Booker, New Bond Street, 1795. | Auvigny,, M. d, (Jean Du Castre),, 1712-1743, Biography, Early works to 1800 | 1 | 328 | Manifest_LitAndLang2 | 0.003040 | 1790 | 1 |
| 79 | 778 | 0006500102.xml | 6500102 | T138418 | 1797 | James the fatalist and his master. Translated from the French of Diderot. In three volumes. ... | Volume 2 | Diderot, Denis | London : printed for G. G. and J. Robinson, 1797. | Free will and determinism, Fiction | 1 | 295 | Manifest_LitAndLang1 | 0.003378 | 1790 | 1 |
| 88 | 5123 | 0008900202.xml | 8900202 | T097240 | 1735 | The skimmer: or the history of Tanzai and Neadarne. ... | Volume 2 | Cr<c3><a9>billon, Claude Prosper Jolyot de | London : printed for F. Galicke, near Temple-Bar, (M.DCC.XXXV.) [1735] | English fiction, 18th century | 1 | 144 | Manifest_LitAndLang2 | 0.006897 | 1730 | 1 |
| 109 | 948 | 0012400202.xml | 12400202 | T057349 | 1761 | The history of James Lovegrove, Esq; In four books. ... | Volume 2 | Morell, Charles, Sir | London : printed for John Wilkie, at the Bible in St. Paul's Church-Yard, MDCCLXI. [1761]. | English fiction, 18th century | 1 | 309 | Manifest_LitAndLang1 | 0.003226 | 1760 | 1 |
| 110 | 950 | 0012400302.xml | 12400302 | T066389 | 1787 | Reuben, or, the suicide. In two volumes. Se piangesse il meschin se sospirasse, Pensal tu che pe... | Volume 2 | Anon | London : printed for W. T. Swift, Charles-Street, St. James's-Square ; and J. Bew, Paternoster-R... | Epistolary fiction, English, Early works to 1800 | 1 | 186 | Manifest_LitAndLang1 | 0.005348 | 1780 | 1 |
| 118 | 972 | 0012600302.xml | 12600302 | T070705 | 1800 | Serena. A novel. In three volumes. By Catharine Selden, Author of the English Nun, &c. ... | Volume 2 | Selden, Catharine | London : printed at the Minerva-Press, for William Lane, Leadenhall-Street, 1800. | English fiction, 18th century | 1 | 216 | Manifest_LitAndLang1 | 0.004608 | 1800 | 1 |
| 130 | 1041 | 0013200202.xml | 13200202 | T107742 | 1790 | Delia, a pathetic and interesting tale. In Four Volumes ... | Volume 2 | Pilkington, Mrs. (Mary) | London : printed for William Lane, Leadenhall Street, 1790. | Epistolary fiction, English, Early works to 1800 | 1 | 278 | Manifest_LitAndLang1 | 0.003584 | 1790 | 1 |
| 133 | 1054 | 0013300202.xml | 13300202 | T064750 | 1784 | Original love-letters, betw[een] a lady of quality and a person of inferior station. ... | Volume 2 | Combe, William | Dublin : printed by J. Rea, for Messrs. Moncrieffe, R. Cross, Exshaw, Wilson, Walker, Jenkin, Bu... | Epistolary fiction, English, Early works to 1800 | 1 | 139 | Manifest_LitAndLang1 | 0.007143 | 1780 | 1 |
| 146 | 1125 | 0013800602.xml | 13800602 | T129736 | 1773 | Reason triumphant over fancy; exemplified in the singular adventures of Don Sylvio de Rosalva. .... | Volume 2 | Wieland, Christoph Martin | London : printed for J. Wilkie; S. Leacroft, and C. Heydinger, 1773. | Adventure and adventurers, Early works to 1800, English fiction, 18th century | 1 | 237 | Manifest_LitAndLang1 | 0.004202 | 1770 | 1 |
| 159 | 1187 | 0016400402.xml | 16400402 | T055340 | 1799 | The expedition of Humphry Clinker. By the author of Roder:c [sic] Random. In two volumes. ... Co... | Volume 2 | Smollett, Tobias George | London : printed for C. Cooke, [1799?]. | Tobacco, Fiction | 1 | 231 | Manifest_LitAndLang1 | 0.004310 | 1790 | 1 |
| 163 | 1233 | 0016700602.xml | 16700602 | T066370 | 1775 | He is found at last: or, memoirs of the Beverley family. In two volumes. ... | Volume 2 | Anon | London : printed for F. and J. Noble, at their respective Circulating Libraries, in Holborn, and... | Epistolary fiction, English, Early works to 1800 | 1 | 225 | Manifest_LitAndLang1 | 0.004425 | 1770 | 1 |
| 173 | 1265 | 0017000302.xml | 17000302 | T064733 | 1789 | The stage-Coach. Containing the character of Mr. Manly, and the history of his fellow-travellers... | Volume 2 | Smythies, Miss | Berwick : printed by W. Phorson, MDCCLXXXIX. [1789]. | English fiction, 18th century | 1 | 226 | Manifest_LitAndLang1 | 0.004405 | 1780 | 1 |
| 186 | 1335 | 0018600502.xml | 18600502 | T114124 | 1773 | The prudential lovers, or the history of Harry Harper. In two volumes. ... | Volume 2 | Anon | London : printed for the author, and sold by John Bell, in the Strand, M,DCC,LXXIII. [1773]. | English fiction, 18th century | 1 | 202 | Manifest_LitAndLang1 | 0.004926 | 1770 | 1 |
| 187 | 1338 | 0018700102.xml | 18700102 | T070729 | 1793 | Memoirs of Mary, a novel. By Mrs. Gunning. In five volumes. ... | Volume 2 | Gunning, Mrs. (Susannah) | London : printed for J. Bell, No. 148, Oxford-Street, MDCCXCIII. [1793]. | Epistolary fiction, English, 18th century | 1 | 251 | Manifest_LitAndLang1 | 0.003968 | 1790 | 1 |
| 191 | 1348 | 0018700502.xml | 18700502 | T014778 | 1770 | A sentimental journey through France and Italy. By Mr. Yorick. ... | Volume 2 | Sterne, Laurence | London : printed for T. Becket and P.A. De Hondt, in the Strand, MDCCLXX. [1770]. | France, In literature, Italy, Social life and customs, France, Social life and customs | 1 | 208 | Manifest_LitAndLang1 | 0.004785 | 1770 | 1 |
| 195 | 1383 | 0019100302.xml | 19100302 | T073514 | 1785 | Eugenius: or, anecdotes of the golden vale: an embellished narrative of real facts. ... | Volume 2 | Graves, Richard | London : printed for J. Dodsley, Pall-Mall, M.DCC.LXXXV. [1785]. | English essays, 18th century | 1 | 194 | Manifest_LitAndLang1 | 0.005128 | 1780 | 1 |
| 223 | 5151 | 0020000402.xml | 20000402 | T064188 | 1785 | Sentimental memoirs: by a lady. ... | Volume 2 | Lady | London : printed by H. Trapp, No. 1. Pater-Noster-Row; and sold by Mr. Hookham, Bond-Street, MDC... | Autobiography, Early works to 1800 | 1 | 330 | Manifest_LitAndLang2 | 0.003021 | 1780 | 1 |
| 232 | 1485 | 0020300402.xml | 20300402 | T131171 | 1788 | Blan<c3><a7>ay, par l'auteur du Nouveau voyage sentimental. Premiere Partie. | Volume 2 | Gorgy, Jean-Claude | A Londres [i.e. Paris] : et se trouve <c3><a0> Paris, chez Guillot, Libraire de Monsieur, sue St... | French fiction, 18th century | 1 | 208 | Manifest_LitAndLang1 | 0.004785 | 1780 | 1 |
| 241 | 1523 | 0020600302.xml | 20600302 | T055310 | 1769 | The history and adventures of an atom. In two volumes. ... | Volume 2 | Smollett, Tobias George | London : printed for Robinson and Roberts, MDCCXLIX [1769]. | English fiction | 1 | 190 | Manifest_LitAndLang1 | 0.005236 | 1760 | 1 |
| 251 | 1566 | 0020900402.xml | 20900402 | T066911 | 1756 | The memoirs of a young lady of quality, a platonist. ... | Volume 2 | Anon | London : printed for R. Baldwin, in Pater-Noster-Row, MDCCLVI. [1756]. | English fiction, 18th century | 1 | 340 | Manifest_LitAndLang1 | 0.002933 | 1750 | 1 |
| 269 | 1658 | 0022300102.xml | 22300102 | T068746 | 1771 | The contemplative man, or the history of Christopher Crab, Esq; of North Wales. ... | Volume 2 | Anon | London : printed for J. Whiston, Fleet-Street, MDCCLXXI. [1771]. | English fiction, 18th century | 1 | 250 | Manifest_LitAndLang1 | 0.003984 | 1770 | 1 |
| 273 | 1688 | 0022600102.xml | 22600102 | T014825 | 1779 | The life and opinions of Tristram Shandy, gentleman. With The Life of the Author. ... | Volume 2 | Sterne, Laurence | London : printed for P. Miller, Bookseller in the Strand, M.DCC.LXXIX. [1779]. | Fiction, Authorship, Fiction | 1 | 73 | Manifest_LitAndLang1 | 0.013514 | 1770 | 1 |
| 296 | 1800 | 0023500102.xml | 23500102 | T014766 | 1768 | A sentimental journey through France and Italy. By Mr. Yorick. ... | Volume 2 | Sterne, Laurence | London : printed for T. Becket and P.A. De Hondt, in the Strand, MDCCLXVIII. [1768]. | Italy, In literature, France, In literature, Italy, Social life and customs, France, Social life... | 1 | 208 | Manifest_LitAndLang1 | 0.004785 | 1760 | 1 |
| 303 | 1825 | 0023700302.xml | 23700302 | T057451 | 1793 | The minstrel; or, anecdotes of distinguished personages in the fifteenth century. In three volum... | Volume 2 | Anon | London : printed for Hookham and Carpenter, Bond-Street, M.DCC.XCIII. [1793]. | English fiction, 18th century | 1 | 240 | Manifest_LitAndLang1 | 0.004149 | 1790 | 1 |
| 305 | 1828 | 0023700402.xml | 23700402 | T057438 | 1778 | The example: or the history of Lucy Cleveland. By a young lady. ... | Volume 2 | Young lady | London : printed for Fielding and Walker, No. 20, Pater-Noster-Row, M.DCC.LXXVIII. [1778]. | [Cleveland, Lucy, Miss.,], Biography, Early works to 1800 | 1 | 236 | Manifest_LitAndLang1 | 0.004219 | 1770 | 1 |
| 307 | 1844 | 0023800402.xml | 23800402 | T057351 | 1769 | Pasquin, a New Allegorical Romance on the Times: with the fortifivead, a burlesque poem. Dedicat... | Volume 2 | Machay, Archibald | London : sold by S. Bladon, in Pater-Noster-Row, MDCCLXIX. [1769]. | English fiction, 18th century | 1 | 234 | Manifest_LitAndLang1 | 0.004255 | 1760 | 1 |
| 313 | 1882 | 0024100602.xml | 24100602 | T108371 | 1789 | The self-Tormentor, a novel. In three volumes. ... | Volume 2 | Anon | London : printed for G. and T. Wilkie, St. Paul's Church Yard, MDCCLXXXIX. [1789]. | Epistolary fiction, English, Early works to 1800 | 1 | 264 | Manifest_LitAndLang1 | 0.003774 | 1780 | 1 |
| 317 | 1905 | 0024300402.xml | 24300402 | T070712 | 1769 | The history of the adventures of Arthur O'Bradley. In two volumes. ... | Volume 2 | Potter, John | London : printed for T. Becket and P.A. de Hondt, in the Strand, MDCCLXIX. [1769]. | Adventure and adventurers, Early works to 1800 | 1 | 263 | Manifest_LitAndLang1 | 0.003788 | 1760 | 1 |
| 319 | 5158 | 0026900202.xml | 26900202 | T107041 | 1782 | The philosophical Quixote; or, memoirs of Mr. David Wilkins. In a series of letters ... | Volume 2 | Anon | London : printed for J. Johnson, in St. Paul's Church-Yard, MDCCLXXXII. [1782]. | Wilkins, David,, 1685-1745, Early works to 1800, Satire, English, Early works to 1800 | 1 | 165 | Manifest_LitAndLang2 | 0.006024 | 1780 | 1 |
| 328 | 1955 | 0027600602.xml | 27600602 | T094266 | 1799 | St. Leon: a tale of the sixteenth century. By William Godwin. In four volumes. ... | Volume 2 | Godwin, William | London : printed for G. G. and J. Robinson, Paternoster-Row. 1799. R. Noble, printer, Great Shir... | Aristocracy (Social class), France, Fiction | 1 | 331 | Manifest_LitAndLang1 | 0.003012 | 1790 | 1 |
| 337 | 1993 | 0028000202.xml | 28000202 | T014812 | 1760 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 2 | Sterne, Laurence | London : printed for R. and J. Dodsley in Pall-Mall, M.DCC.LX. [1760]. | Fiction, Authorship, Fiction | 1 | 182 | Manifest_LitAndLang1 | 0.005464 | 1760 | 1 |
| 340 | 2005 | 0028400402.xml | 28400402 | T066366 | 1767 | The female American; or, the adventures of Unca Eliza Winkfield. Compiled by herself. In two vol... | Volume 2 | Winkfield, Unca Eliza | London : printed for Francis Noble, at his Circulating Library, opposite Gray's-Inn Gate, Holbou... | Indians of North America, Fiction | 1 | 183 | Manifest_LitAndLang1 | 0.005435 | 1760 | 1 |
| 345 | 2015 | 0028500302.xml | 28500302 | T089903 | 1780 | The history of Tom Jones, a foundling. By Henry Fielding, Esq; ... | Volume 2 | Fielding, Henry | Paris : printed by Fr. Amb. Didot the eldest, and sold by J. N. Pissot, and Barrois junior, Quai... | English fiction | 1 | 386 | Manifest_LitAndLang1 | 0.002584 | 1780 | 1 |
| 361 | 2075 | 0029000602.xml | 29000602 | T089897 | 1795 | The history of Joseph Andrews, and his friend Mr. Abraham Adams. Written in Imitation of the Man... | Volume 2 | Fielding, Henry | London : printed for C. Cooke, No. 17, Paternoster-Row, And sold by all the Booksellers in Great... | Domestics, Fiction, Early works to 1800, Male friendship, Fiction, Early works to 1800, England,... | 1 | 170 | Manifest_LitAndLang1 | 0.005848 | 1790 | 1 |
| 364 | 2106 | 0032600102.xml | 32600102 | T080601 | 1772 | The fine lady a novel by the author of Miss Melmoth. In two volumes. ... | Volume 2 | Briscoe, Sophia | London : printed for T. Lowndes, No. 77, in Fleet-Street, MDCCLXXII. [1772]. | Epistolary fiction, English, Early works to 1800 | 1 | 272 | Manifest_LitAndLang1 | 0.003663 | 1770 | 1 |
| 377 | 2160 | 0033000402.xml | 33000402 | T064911 | 1796 | Travels before the flood. An interesting Oriental Record of men and manners in the antidiluvian ... | Volume 2 | Klinger, Friedrich Maximilian | London : printed for G:g: & J: Robinson Paternoster Row, 1796. | God, Fatherhood, Early works to 1800, History, Ancient, Early works to 1800 | 1 | 226 | Manifest_LitAndLang1 | 0.004405 | 1790 | 1 |
| 378 | 2166 | 0033100202.xml | 33100202 | T090856 | 1795 | Moral tales, by M. Marmontel. Translated from the French. In two volumes. Vol. I. Cooke's editio... | Volume 2 | Marmontel, Jean-Fran<c3><a7>ois | London : printed for C. Cooke, No. 17, Paternoster-Row, And sold by all the Booksellers in Great... | Didactic fiction, French, Translations into English, Early works to 1800 | 1 | 252 | Manifest_LitAndLang1 | 0.003953 | 1790 | 1 |
| 389 | 2207 | 0033400502.xml | 33400502 | T067640 | 1765 | The parasite. ... | Volume 2 | Anon | Dublin : printed for P. Wilson, J. Exshaw, S. Cotter, H. Saunders, E. Watts, J. Potts, and J. Wi... | English fiction | 1 | 114 | Manifest_LitAndLang1 | 0.008696 | 1760 | 1 |
| 399 | 2236 | 0036500302.xml | 36500302 | T094332 | 1799 | The false friend: a domestic story. By Mary Robinson, Author of Poems, Walsingham, Angelina, Hub... | Volume 2 | Robinson, Mary | London : printed for T. N. Longman and O. Rees, Paternoster-Row, 1799. | Epistolary fiction, English, Early works to 1800 | 1 | 351 | Manifest_LitAndLang1 | 0.002841 | 1790 | 1 |
| 402 | 2240 | 0036500402.xml | 36500402 | T068056 | 1750 | The adventures of Mr. Loveill, interspers'd with many real amours of the modern polite world. ... | Volume 2 | Anon | London : printed for M. Cooper, at the Globe in Pater-Noster Row, M.DCC.L. [1750]. | English literature, 18th century | 1 | 339 | Manifest_LitAndLang1 | 0.002941 | 1750 | 1 |
| 408 | 2260 | 0036700502.xml | 36700502 | T108636 | 1794 | Edward De Courcy, an ancient fragment. In two volumes. ... | Volume 2 | Anon | London : printed for William Lane, at the Minerva Press, Leadenhall-Street, M.DCC.XCIV. [1794]. | English fiction, 18th century | 1 | 194 | Manifest_LitAndLang1 | 0.005128 | 1790 | 1 |
| 413 | 2283 | 0037000302.xml | 37000302 | T062265 | 1765 | The generous Briton; or, the authentic memoirs of William Goldsmith, Esq; In two volumes. ... | Volume 2 | Kimber, Edward | London : printed for C. Henderson, under the Royal Exchange, MDCCLXV. [1765]. | English fiction, 18th century | 1 | 340 | Manifest_LitAndLang1 | 0.002933 | 1760 | 1 |
| 426 | 2322 | 0049600302.xml | 49600302 | T055331 | 1781 | The expedition of Humphry Clinker. By the author of Roderick Random. In two volumes. ... | Volume 2 | Smollett, Tobias George | Dublin : printed for J. Exshaw, M. Hay, W. Sleater, D. Chamberlain, J. Potts, J. Holy, and J. Wi... | Tobacco, Fiction | 1 | 244 | Manifest_LitAndLang1 | 0.004082 | 1780 | 1 |
| 432 | 2382 | 0055500302.xml | 55500302 | T094635 | 1768 | The history of Emily Willis, a natural daughter. In two volumes. ... | Volume 2 | Anon | London : printed for F. Noble, at his Circulating Library, near Middle-Row Holborn; and J. Noble... | English fiction, 18th century | 1 | 221 | Manifest_LitAndLang1 | 0.004505 | 1760 | 1 |
| 440 | 2422 | 0073300202.xml | 73300202 | T072290 | 1790 | The life and adventures of Robinson Crusoe, of York, mariner: who lived eight and twenty years a... | Volume 2 | Defoe, Daniel | London : printed for W. Lane, Leadenhall-Street, MDCCXC. [1790]. | Castaways, Fiction, Early works to 1800, Crusoe, Robinson (Fictitious character), Fiction, Early... | 1 | 284 | Manifest_LitAndLang1 | 0.003509 | 1790 | 1 |
| 444 | 28 | 0098900302.xml | 98900302 | N000718 | 1737 | Histoire du vaillant chevalier Tiran le Blanc, traduite de l'espagnol. ... | Volume 2 | Martorell, Joanot | A Londres [i.e. Paris] : aux d<c3><a9>pens de la compagnie, M.DCC.XXXVII. [1737]. | Spanish fiction, 18th century, Translations into French | 1 | 384 | Manifest_HistAndGeo | 0.002597 | 1730 | 1 |
| 447 | 8529 | 0147502802.xml | 147502802 | N014289 | 1798 | Theopha; or, memoirs of a Greek slave; as related by her lover, envoy from the court of France t... | Volume 2 | Anon | London : printed by S. Low, Berwick-Street, Soho, 1798. | Slaves, Greece, Fiction | 1 | 258 | Manifest_SSAndFineArt | 0.003861 | 1790 | 1 |
| 456 | 66 | 0237100902.xml | 237100902 | T100449 | 1776 | The history of Lady Sophia Sternheim. Attempted from the German of Mr. Wieland..... | Volume 2 | La Roche, Sophie von | London : printed for Mr. Joseph Collyer, and sold by T. Jones at clifford's inn-gate,..., MDCCLX... | Women, Germany, Fiction, Early works to 1800, Germany, Social conditions, 18th century, Fiction | 1 | 217 | Manifest_HistAndGeo | 0.004587 | 1770 | 1 |
| 460 | 5204 | 0244900602.xml | 244900602 | T106165 | 1788 | The correspondence of two lovers, inhabitants of Lyons. Published from the French originals. In ... | Volume 2 | Anon | London : sold by T. Hookham, New Bond Street; and G. G. J. and J. Robinson, Paternoster Row. M.D... | English letters, Early works to 1800 | 1 | 177 | Manifest_LitAndLang2 | 0.005618 | 1780 | 1 |
| 463 | 73 | 0248600602.xml | 248600602 | N001522 | 1786 | History of the Honourable Edward Mortimer. By a lady. ... | Volume 2 | Gwynn, Albinia | Dublin : printed for Messrs. White, Byrne, Parker, and Cash, MDCCLXXXVI. [1786]. | English fiction, 18th century | 1 | 124 | Manifest_HistAndGeo | 0.008000 | 1780 | 1 |
| 472 | 5235 | 0272700202.xml | 272700202 | T010033 | 1781 | The history of the adventures of Joseph Andrews, and of his friend Mr. Abraham Adams. Written in... | Volume 2 | Fielding, Henry | London : printed for Harrison and Co. No. 18, Paternoster-Row, MDCCLXXXI. [1781]. | Tobacco, Fiction | 1 | 86 | Manifest_LitAndLang2 | 0.011494 | 1780 | 1 |
| 481 | 8424 | 0298100202.xml | 298100202 | T163094 | 1794 | The citizen of the world, or letters from a Chinese philosopher, residing in London, to his frie... | Volume 2 | Goldsmith, Oliver | London : printed for J. Parsons, No. 21, Paternoster-Row, 1794. | Epistolary fiction | 1 | 202 | Manifest_RelandPhil | 0.004926 | 1790 | 1 |
| 489 | 5289 | 0326900902.xml | 326900902 | T134536 | 1775 | Letters on the improvement of the mind, addressed to a young lady. In two volumes. By Mrs. Chapone. | Volume 2 | Chapone, Mrs. (Hester) | London : printed for J. Walter; and E. and C. Dilly, 1775. | Conduct of life, Early works to 1900, Women, Education, Conduct of life | 1 | 233 | Manifest_LitAndLang2 | 0.004274 | 1770 | 1 |
| 490 | 5290 | 0327000102.xml | 327000102 | T122381 | 1764 | L'espion chinois: ou, L'Envoy<c3><a9> secret de la cour de Pekin, pour examiner L'<c3><a9>tat Pr... | Volume 2 | Goudar, Ange | A Cologne [i.e. London] : [s.n.], MDCCLXIV. [1764]. | Civilization, Oriental, Early works to 1800, Europe, Civilization, 18th century | 1 | 311 | Manifest_LitAndLang2 | 0.003205 | 1760 | 1 |
| 500 | 5348 | 0335900702.xml | 335900702 | T168502 | 1775 | Lettres a une princesse d'allemagne sur divers sujets de physique et de philosophie. ... | Volume 2 | Euler, Leonhard | Londres [i.e. The Hague] : chez la Soci<c3><a9>t<c3><a9> Typographique, M.DCC.LXX.V. [1775]. | Science, Physics | 1 | 325 | Manifest_LitAndLang2 | 0.003067 | 1770 | 1 |
| 522 | 5458 | 0351701502.xml | 351701502 | N002257 | 1795 | Elisa Powell, or trials of sensibility: a series of original letters, collected by a Welsh curat... | Volume 2 | Davies, Edward | London : printed for G. G. and J. Robinson, Pater Noster Row, 1795. | Epistolary fiction, English, Early works to 1800 | 1 | 302 | Manifest_LitAndLang2 | 0.003300 | 1790 | 1 |
| 523 | 5477 | 0352100902.xml | 352100902 | N015275 | 1768 | The busy-Body; or, the adventures of Monsieur Bigand; a man infinitely inquisitive and enterpriz... | Volume 2 | Mouhy, Chevalier de | Dublin : printed for James Williams, in Skinner-Row, MDCCLXVIII. [1768]. | [Bigand, Monsieur], Early works to 1800, Adventure and adventurers, Ireland, Early works to 1800 | 1 | 249 | Manifest_LitAndLang2 | 0.004000 | 1760 | 1 |
| 540 | 5552 | 0363001102.xml | 363001102 | T065259 | 1743 | The life of Marianne: or, the adventures of the Countess of *** By M. de Marivaux. Translated fr... | Volume 2 | Marivaux, Pierre Carlet de Chamblain de | London : printed for Charles Davis against Grays-Inn in Holbourn; and Paul Vaillant in the Stran... | French fiction, 18th century, Translations into English | 1 | 276 | Manifest_LitAndLang2 | 0.003610 | 1740 | 1 |
| 554 | 5628 | 0366400302.xml | 366400302 | T120446 | 1800 | St. Leon: a tale of the sixteenth century. By William Godwin. In four volumes. | Volume 2 | Godwin, William | London : printed for G. G. and J. Robinson, by J. Cundee, 1800. | Aristocracy (Social class), France, Fiction | 1 | 331 | Manifest_LitAndLang2 | 0.003012 | 1800 | 1 |
| 566 | 5681 | 0373800302.xml | 373800302 | T014749 | 1776 | Letters from Yorick to Eliza and Sterne's letters to his friends on various occasions. To which ... | Volume 2 | Sterne, Laurence | Dublin : printed by R. Steuart, sold by the booksellers in Dublin, and Cork, M.DCC,LXXVI. [1776]. | English letters, Novelists, English, 18th century, Correspondence | 1 | 86 | Manifest_LitAndLang2 | 0.011494 | 1770 | 1 |
| 567 | 5698 | 0375900202.xml | 375900202 | T134367 | 1784 | Evelina; or, a young lady's entrance into the world. In two volumes. ... | Volume 2 | Burney, Fanny | Dublin : printed for Messrs. Price, Corcoran, Fitzsimons, Whitestone, Chamberlaine [and 10 other... | Epistolary fiction, English, Early works to 1800 | 1 | 292 | Manifest_LitAndLang2 | 0.003413 | 1780 | 1 |
| 583 | 5758 | 0377200102.xml | 377200102 | T089167 | 1721 | The works of the Right Honourable Joseph Addison, Esq; In four volumes. ... | Volume 2 | Addison, Joseph | London : printed for Jacob Tonson, at Shakespear's-Head, over-against Katharine-Street in the St... | English literature | 1 | 586 | Manifest_LitAndLang2 | 0.001704 | 1720 | 1 |
| 599 | 5855 | 0387400202.xml | 387400202 | T140061 | 1799 | Rosella, or modern occurrences. A novel. In four volumes. By Mary Charlton, ... | Volume 2 | Charlton, Mary | London : printed at the Minerva-Press, for William Lane, 1799. | English fiction | 1 | 297 | Manifest_LitAndLang2 | 0.003356 | 1790 | 1 |
| 603 | 5863 | 0387500902.xml | 387500902 | T014717 | 1760 | The life and opinions of Tristram Shandy, gentleman. ... | Volume 2 | Sterne, Laurence | Dublin : printed for D. Chamberlaine, in Smock-Alley, and S. Smith, at Mr. Faulkner's in Essex-S... | Fiction, Authorship, Fiction | 1 | 136 | Manifest_LitAndLang2 | 0.007299 | 1760 | 1 |
| 614 | 5897 | 0402300702.xml | 402300702 | T119862 | 1756 | The history of Margaret of Anjou, queen of England. Translated from the French of the Abb<c3><a9... | Volume 2 | Pr<c3><a9>vost, abb<c3><a9> | Dublin : printed for G. Faulkner, and R. James, 1756. | Margaret,, of Anjou, Queen, consort of Henry VI, King of England,, 1430-1482 | 1 | 182 | Manifest_LitAndLang2 | 0.005464 | 1750 | 1 |
| 616 | 5901 | 0402400302.xml | 402400302 | T119508 | 1796 | Theodore Cyphon: or, the benevolent Jew. A novel. In two volumes. By George Walker, ... | Volume 2 | Walker, George | Dublin : printed for John Rice, 1796. | English fiction, 18th century | 1 | 224 | Manifest_LitAndLang2 | 0.004444 | 1790 | 1 |
| 619 | 8445 | 0416700202.xml | 416700202 | T146761 | 1776 | The citizen of the world: or, letters from a Chinese philosopher, residing in London, to his fri... | Volume 2 | Goldsmith, Oliver | London : printed for R. Whiston, J. Woodfall, T. Baldwin, R. Johnston, and G. Caddel, MDCCLXXVI.... | Epistolary fiction | 1 | 285 | Manifest_RelandPhil | 0.003497 | 1770 | 1 |
| 624 | 113 | 0443200102.xml | 443200102 | T057309 | 1784 | Cecilia, or memoirs of an heiress. By the author of Evelina. The fourth edition. In five volumes... | Volume 2 | Burney, Fanny | London : printed for T. Payne and Son at the Mews-Gate, and T. Cadell in the Strand, MDCCLXXXIV.... | English fiction, 18th century | 1 | 283 | Manifest_HistAndGeo | 0.003521 | 1780 | 1 |
| 626 | 8552 | 0452900302.xml | 452900302 | T130307 | 1761 | Honny soit qui mal y pense, ou histoires des filles c<c3><a9>lebres du XVIIIe siecle. ... | Volume 2 | Desboulmiers, Jean-Augustin-Julien | Londres [i.e. Paris?], 1761. | French fiction, Courtesans, France, Fiction | 1 | 114 | Manifest_SSAndFineArt | 0.008696 | 1760 | 1 |
| 632 | 130 | 0487700202.xml | 487700202 | T101857 | 1785 | Memoirs of a Pythagorean. In which are delineated the manners, customs, genius, and polity of an... | Volume 2 | Thomson, Alexander, M.D. | London : printed for G. G. J. and J. Robinson, Pater-Noster Row, 1785. | Historical fiction, English, Early works to 1800 | 1 | 165 | Manifest_HistAndGeo | 0.006024 | 1780 | 1 |
| 641 | 5964 | 0555400502.xml | 555400502 | N043386 | 1786 | Barham Downs: or memoirs of the Whitaker family. With anecdotes of Lord Winterbottom. A novel. I... | Volume 2 | Bage, Robert | Dublin : printed by S. Colbert, No. 136, Capel-Street, 1786. | English fiction, 18th century | 1 | 235 | Manifest_LitAndLang2 | 0.004237 | 1780 | 1 |
| 642 | 5967 | 0555400702.xml | 555400702 | N038691 | 1742 | Persiles and Sigismunda: a celebrated novel. Intermixed with a great Variety of Delightful Histo... | Volume 2 | Cervantes Saavedra, Miguel de | Dublin : printed by and for Oli. Nelson, at Milton's Head in Skinner-Row, MDCCXLII. [1742]. | English fiction, 18th century | 1 | 271 | Manifest_LitAndLang2 | 0.003676 | 1740 | 1 |
| 663 | 2553 | 0594600802.xml | 594600802 | T127128 | 1732 | The life of Mr. Cleveland natural son of Oliver Cromwell. Written by himself. Giving a particula... | Volume 2 | Pr<c3><a9>vost, abb<c3><a9> | London : printed for N. Prevost in the Strand, MDCCXXXII. [1732]. | French fiction, 18th century, Translations into English | 1 | 283 | Manifest_LitAndLang1 | 0.003521 | 1730 | 1 |
| 664 | 2557 | 0594700302.xml | 594700302 | T142995 | 1719 | The entertaining novels of Mrs. Jane Barker. In two volumes. I. Exilius; or the banish'd Roman. ... | Volume 2 | Barker, Jane | London : printed for A. Bettesworth, in Pater-Noster-Row, and E. Curll, in Fleet-Street, 1719. | English fiction, 18th century | 1 | 211 | Manifest_LitAndLang1 | 0.004717 | 1710 | 1 |
| 684 | 2650 | 0614900202.xml | 614900202 | T097975 | 1778 | The adventurer. ... . On vent'rous wing in quest of praise I go, And leave the gazing multitude ... | Volume 2 | Hawkesworth, John | London : printed for W. Strahan, J. Rivington and Sons, J. Dodsley, T. Longman, B. Law, T. Caslo... | English essays, 18th century, Newspapers | 1 | 296 | Manifest_LitAndLang1 | 0.003367 | 1770 | 1 |
| 690 | 2674 | 0615500102.xml | 615500102 | T119309 | 1796 | The children of the abbey, a tale. In four volumes. By Regina Maria Roche. ... | Volume 2 | Roche, Regina Maria | London : printed for William Lane, at the Minerva-Press, 1796. | English fiction, 18th century | 1 | 344 | Manifest_LitAndLang1 | 0.002899 | 1790 | 1 |
| 700 | 2713 | 0622000302.xml | 622000302 | T014754 | 1769 | A sentimental journey through France and Italy. By Mr. Yorick. ... | Volume 2 | Sterne, Laurence | Dublin : printed for G. Faulkner, W. and W. Smith, J. Hoey, Sen. P. and W. Wilson, J. Exshaw, H.... | France, In literature | 1 | 127 | Manifest_LitAndLang1 | 0.007812 | 1760 | 1 |
| 705 | 2739 | 0625100202.xml | 625100202 | T118966 | 1765 | The virtuous orphan; or the life, misfortunes, and adventures, of Indiana. Written by herself. I... | Volume 2 | Marivaux, Pierre Carlet de Chamblain de | London : printed for L. Hawes, W. Clarke, and R. Collins, at the Red-Lion in Pater-Noster-Row, M... | Marianne, Early works to 1800, French fiction, 18th century, Translations into English | 1 | 298 | Manifest_LitAndLang1 | 0.003344 | 1760 | 1 |
| 713 | 2775 | 0633000602.xml | 633000602 | T014805 | 1775 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 2 | Sterne, Laurence | London : printed for J. Dodsley in Pall-Mall, MDCCLXXV. [1775]. | Fiction, Authorship, Fiction | 1 | 236 | Manifest_LitAndLang1 | 0.004219 | 1770 | 1 |
| 730 | 2876 | 0646700202.xml | 646700202 | T123810 | 1779 | Persian tales designed for use and entertainment. ... | Volume 2 | Facius, J. F. (Johann Friedrich) | Coburg : printed by R.A.W. Ahl, MDCCLXXIX. [1779]. | Fairy tales, Early works to 1800 | 1 | 156 | Manifest_LitAndLang1 | 0.006369 | 1770 | 1 |
| 731 | 2888 | 0647100102.xml | 647100102 | T029277 | 1795 | Julia de Roubign<c3><a9>, a tale. In a series of letters. Published by the author of The man of ... | Volume 2 | Mackenzie, Henry | London : printed for A. Strahan, and T. Cadell; and W. Creech at Edinburgh: and sold by T. Cadel... | Fathers and daughters, Fiction, Early works to 1800, Married women, Fiction, Early works to 1800... | 1 | 195 | Manifest_LitAndLang1 | 0.005102 | 1790 | 1 |
| 732 | 2893 | 0647100602.xml | 647100602 | T125276 | 1705 | The history of the renowned Don Quixote de la Mancha. Written in Spanish, by Miguel de Cervantes... | Volume 2 | Cervantes Saavedra, Miguel de | London : printed for Sam. Buckley, 1705-06. | English fiction, 18th century | 1 | 309 | Manifest_LitAndLang1 | 0.003226 | 1700 | 1 |
| 734 | 2907 | 0653100502.xml | 653100502 | T086147 | 1772 | The trial: or, the history of Charles Horton, Esq. By a gentleman. In two volumes. ... | Volume 2 | Gentleman | Dublin : printed for H. Saunders, W. Sleater, J. Potts, J. Williams, T. Walker, R. Moncrieffe, a... | [Horton, Charles], Biography, Early works to 1800, Epistolary fiction, English, Early works to 1800 | 1 | 197 | Manifest_LitAndLang1 | 0.005051 | 1770 | 1 |
| 762 | 3045 | 0681400102.xml | 681400102 | T139666 | 1798 | The Sicilian. A novel. In four volumes. By the author of The mysterious wife. ... | Volume 2 | Meeke, Mary | London : printed at the Minerva Press, for William Lane, 1798. | English fiction, 18th century | 1 | 348 | Manifest_LitAndLang1 | 0.002865 | 1790 | 1 |
| 788 | 3181 | 0703000502.xml | 703000502 | T099736 | 1791 | Poems by Mrs. M. Robinson. | Volume 2 | Robinson, Mary | London : printed by J. Bell, British-Library, Strand, Bookseller to His Royal Highness the Princ... | Odes, English, Early works to 1800 | 1 | 233 | Manifest_LitAndLang1 | 0.004274 | 1790 | 1 |
| 797 | 3236 | 0713700602.xml | 713700602 | T073527 | 1794 | Memoirs of Mary, a novel. By Mrs. Gunning. In five volumes. ... | Volume 2 | Gunning, Mrs. (Susannah) | London : printed for J. Bell, No. 148, Oxford-Street, MDCCXCIV. [1794]. | Epistolary fiction, English, 18th century | 1 | 254 | Manifest_LitAndLang1 | 0.003922 | 1790 | 1 |
| 811 | 226 | 0726700202.xml | 726700202 | T082661 | 1755 | The history of Will Ramble, a libertine. Compiled from genuine materials, and the several incide... | Volume 2 | Anon | London : printed for the author: and sold by G. Woodfall, at the King's-Arms, Charing-Cross, M.D... | Historical fiction, English, Early works to 1800 | 1 | 332 | Manifest_HistAndGeo | 0.003003 | 1750 | 1 |
| 879 | 3762 | 0801300202.xml | 801300202 | T135927 | 1756 | The gray's-Inn journal. By Mr. Murphy. | Volume 2 | Murphy, Arthur | Dublin : printed by William Sleater, Bookseller, at Pope's Head on Cork-Hill, [1756]. | Spectator (London, England : 1711), Theater, Great Britain, Periodicals, Great Britain, Social l... | 1 | 310 | Manifest_LitAndLang1 | 0.003215 | 1750 | 1 |
| 880 | 3771 | 0803800802.xml | 803800802 | T163561 | 1754 | The adventurer. ... | Volume 2 | Hawkesworth, John | Dublin : printed for G. and A. Ewing, at the Angel and Bible in Dame-Street, M.DCC.LIV. [1754]. | English essays, 18th century, Newspapers | 1 | 376 | Manifest_LitAndLang1 | 0.002653 | 1750 | 1 |
| 881 | 3774 | 0804000302.xml | 804000302 | T135689 | 1737 | The history and adventures of Gil Blas of Santillane. In three volumes. ... | Volume 2 | Le Sage, Alain Ren<c3><a9> | London : printed for J. and R. Tonson, 1737. | Shelley, Mary Wollstonecraft,, 1797-1851, Books and reading | 1 | 311 | Manifest_LitAndLang1 | 0.003205 | 1730 | 1 |
| 884 | 255 | 0806100202.xml | 806100202 | N006364 | 1752 | The female Quixote: or, the adventures of Arabella. In two volumes. ... | Volume 2 | Lennox, Charlotte | Dublin : printed for J. Smith, at the Philosophers-Heads on the Blind-Quay, MDCCLII. [1752]. | English fiction, 18th century, Women, Fiction, Early works to 1800, England, Social life and cus... | 1 | 255 | Manifest_HistAndGeo | 0.003906 | 1750 | 1 |
| 911 | 3881 | 0831600402.xml | 831600402 | T057811 | 1765 | Nutrebian tales, Or the strange and surprising adventures of a captive Queen, Wonderful Delivera... | Volume 2 | Anon | London : printed for R. Dodsley, Pall-Mall, M,DCC,LXV. [1765]. | English fiction, 18th century | 1 | 256 | Manifest_LitAndLang1 | 0.003891 | 1760 | 1 |
| 917 | 3906 | 0832300202.xml | 832300202 | T014740 | 1760 | The life and opinions of Tristram Shandy, gentleman. ... | Volume 2 | Sterne, Laurence | London : printed for D. Lynch, MDCCLX. [1760]-67 [1771?]. | Fiction, Authorship, Fiction | 1 | 135 | Manifest_LitAndLang1 | 0.007353 | 1760 | 1 |
| 925 | 3942 | 0836400102.xml | 836400102 | T065784 | 1773 | Letters on the improvement of the mind, addressed to a young lady. In two volumes. ... | Volume 2 | Chapone, Mrs. (Hester) | London : printed by H. Hughs, for J. Walter, Homer's Head, Charing-Cross, MDCCLXXIII. [1773]. | Women, Education, Conduct of life, Early works to 1900 | 1 | 234 | Manifest_LitAndLang1 | 0.004255 | 1770 | 1 |
| 929 | 3952 | 0842600102.xml | 842600102 | T089166 | 1761 | The works of the late Right Honorable Joseph Addison, Esq; ... . With a Complete Index. | Volume 2 | Addison, Joseph | Birmingham : printed by John Baskerville, for J. and R. Tonson, At Shakespear's Head in the Stra... | Addison, Joseph,, 1672-1719, Early works to 1800, English literature, 18th century | 1 | 564 | Manifest_LitAndLang1 | 0.001770 | 1760 | 1 |
| 959 | 4098 | 0865000102.xml | 865000102 | T062061 | 1797 | The italian, or, the confessional of the black penitents. A romance. By Ann Radcliffe, author of... | Volume 2 | Radcliffe, Ann Ward | Dublin : printed for P. Wogan, P. Byrne, J. Exshaw, T. Stewart, J. Milliken, J. Moore, J. Rice, ... | Naples (Italy), Fiction, Early works to 1800 | 1 | 350 | Manifest_LitAndLang1 | 0.002849 | 1790 | 1 |
| 987 | 4251 | 0881400202.xml | 881400202 | N025043 | 1783 | The woman of letters; or, the history of Miss Fanny Belton. In two volumes. ... | Volume 2 | Smyth, Maria | London : printed for Francis Noble, at his Circulating Library, opposite Gray's-Inn Gate, Holbor... | Epistolary fiction, English, Early works to 1800 | 1 | 292 | Manifest_LitAndLang1 | 0.003413 | 1780 | 1 |
| 1009 | 4341 | 0889900202.xml | 889900202 | T131116 | 1769 | The mistakes of the heart: or, memoirs of Lady Carolina Pelham and Lady Victoria Nevil. In a ser... | Volume 2 | Treyssac de Vergy, Pierre Henri | London : printed for J. Murdoch in the Strand, MDCCLXIX. [1769]. | Epistolary fiction, English, Early works to 1800 | 1 | 223 | Manifest_LitAndLang1 | 0.004464 | 1760 | 1 |
| 1028 | 6104 | 0898000102.xml | 898000102 | T134727 | 1790 | Histoire de Gil Blas de Santillane. Par M. Le Sage. | Volume 2 | Le Sage, Alain Ren<c3><a9> | A Londres : chez J.F. & C. Rivington, T. Longman, B. Law, G.G.J. & J. Robinson, & F. Wingrave, s... | French fiction | 1 | 286 | Manifest_LitAndLang2 | 0.003484 | 1790 | 1 |
| 1033 | 4411 | 0901400102.xml | 901400102 | T117802 | 1764 | The tatler; or, lucubrations of Isaac Bickerstaff, Esq; ... | Volume 2 | Steele, Richard, Sir | London : printed for J. and R. Tonson, J. Buckland, H. Woodfall, J. Rivington, J. Hinton, R. Bal... | Great Britain, Intellectual life, Periodicals | 1 | 352 | Manifest_LitAndLang1 | 0.002833 | 1760 | 1 |
| 1073 | 4626 | 0937800802.xml | 937800802 | N008565 | 1756 | The devil upon crutches in England, or night scenes in London. A satirical work. Written upon th... | Volume 2 | Gentleman of Oxford | London : printed for Philip Hodges, at the Globe in Great Turnstile, Holborn, 1756. | Fiction, 18th century | 1 | 81 | Manifest_LitAndLang1 | 0.012195 | 1750 | 1 |
| 1094 | 6194 | 0971500202.xml | 971500202 | T177358 | 1788 | The reflector. A selection of Essays on Various Subjects of Common Life. From original papers. I... | Volume 2 | Anon | London : printed for W. Lane, Leadenhall-Street, MDCCLXXXVIII. [1788]. | Conduct of life, Early works to 1800 | 1 | 216 | Manifest_LitAndLang2 | 0.004608 | 1780 | 1 |
| 1107 | 4839 | 0986100202.xml | 986100202 | T118232 | 1742 | The skimmer: or the history of Tanzai and Neadarne. ... | Volume 2 | Cr<c3><a9>billon, Claude Prosper Jolyot de | London : printed for F. Galicke near Temple-Bar, M.DCC.XLII. [1742] | English fiction, 18th century | 1 | 139 | Manifest_LitAndLang1 | 0.007143 | 1740 | 1 |
| 1118 | 4902 | 0998800702.xml | 998800702 | T211729 | 1792 | It is, and it is not a novel. In two volumes. By Charlotte Palmer. ... | Volume 2 | Palmer, Charlotte | London : printed for Hookham and Carpenter, Old and New Bond-Street, 1792. | Epistolary fiction, English, Early works to 1800 | 1 | 383 | Manifest_LitAndLang1 | 0.002604 | 1790 | 1 |
| 1133 | 5013 | 1010301302.xml | 1010301302 | T200769 | 1773 | The life and opinions of Tristram Shandy, gentleman. |... | Volume 2 | Sterne, Laurence | Dublin : printed by Henry Saunders, in Great Ship-Street, near Stephen-Street, MDCCLXXIII. [1773]. | Fiction, Authorship, Fiction | 1 | 65 | Manifest_LitAndLang1 | 0.015152 | 1770 | 1 |
| 1147 | 5067 | 1014200202.xml | 1014200202 | T185980 | 1775 | The expedition of Humphry Clinker. By the author of Roderick Random. In two volumes. ... | Volume 2 | Smollett, Tobias George | Dublin : printed by the United Company of Book-Sellers, 1775. | Tobacco, Fiction | 1 | 224 | Manifest_LitAndLang1 | 0.004444 | 1770 | 1 |
| 1186 | 6534 | 1042300302.xml | 1042300302 | T222948 | 1774 | Yorick's sentimental journey, continued. To which is prefixed, some account of the life and writ... | Volume 2 | Hall-Stevenson, John | London : printed for J. Bew, in Paternoster-Row, MDCCLXXIV. [1774]. | Sterne, Laurence,, 1713-1768 | 1 | 178 | Manifest_LitAndLang2 | 0.005587 | 1770 | 1 |
| 1204 | 6623 | 1045600202.xml | 1045600202 | T178561 | 1774 | The tatler; or, lucubrations of Isaac Bickerstaff, Esq; ... | Volume 2 | Steele, Richard, Sir | London : printed for C. Bathurst, J. Buckland, W. Strahan, J. and F. Rivington, C. Say, J. Hinto... | English essays | 1 | 352 | Manifest_LitAndLang2 | 0.002833 | 1770 | 1 |
| 1219 | 6724 | 1056600202.xml | 1056600202 | T181013 | 1800 | The three Spaniards, a romance. By George Walker, author of the vagabond, &c. In three volumes. ... | Volume 2 | Walker, George | London : printed by Sampson Low, for G. Walker, No. 106, Great Portland Street; and Hurst, No. 3... | English fiction | 1 | 264 | Manifest_LitAndLang2 | 0.003774 | 1800 | 1 |
| 1225 | 6803 | 1062400102.xml | 1062400102 | T059508 | 1719 | The history of the renowned Don Quixote de la Mancha. In Four Volumes. Written in Spanish by Mig... | Volume 2 | Cervantes Saavedra, Miguel de | London : printed for R. Knaplock, D. Midwinter, J. Tonson, and W. Churchill; and are to be sold ... | English fiction, 18th century | 1 | 313 | Manifest_LitAndLang2 | 0.003185 | 1710 | 1 |
| 1255 | 6979 | 1099000202.xml | 1099000202 | N012822 | 1784 | The ring, a novel: In a Series of Letters. By a Young Lady. In Three Volumes. ... | Volume 2 | Young Lady | Dublin : printed for S. Price, W. & H. Whitestone, R. Moncrieffe, T. Walker, G. Burnet, J. Exsha... | Biographical fiction, Early works to 1800 | 1 | 126 | Manifest_LitAndLang2 | 0.007874 | 1780 | 1 |
| 1256 | 6983 | 1101200302.xml | 1101200302 | T094333 | 1799 | The false friend: a domestic story. By Mary Robinson, Author of Poems, Walsingham, Angelina, Hub... | Volume 2 | Robinson, Mary | London : printed for T.N. Longman and O. Rees, Paternoster-Row, 1799. | Epistolary fiction, English, Early works to 1800 | 1 | 345 | Manifest_LitAndLang2 | 0.002890 | 1790 | 1 |
| 1274 | 7097 | 1111400102.xml | 1111400102 | N018960 | 1790 | Letters on the manners of the French, and on the follies and extravagancies of the times. Writte... | Volume 2 | Caraccioli, Louis Antoine, marquis | London : printed for G. G. J. and J. Robinson , Pater-Noster-Row ; and W. Keymer, Colchester, M.... | France, Social life and customs, 18th century | 1 | 284 | Manifest_LitAndLang2 | 0.003509 | 1790 | 1 |
| 1285 | 7147 | 1118301802.xml | 1118301802 | T162113 | 1789 | Arundel. By the author of The observer. ... | Volume 2 | Cumberland, Richard | Dublin : printed for Messrs. G. Burnet, R. Moncrieffe, L. White, P. Byrne, P. Wogan, C. Lewis, J... | Epistolary fiction, English, Early works to 1800 | 1 | 300 | Manifest_LitAndLang2 | 0.003322 | 1780 | 1 |
| 1302 | 397 | 1134700502.xml | 1134700502 | N008001 | 1781 | The history of John Juniper, Esq. alias Juniper Jack. Containing the birth, parentage, and educa... | Volume 2 | Johnstone, Charles | Dublin : printed for S. Price, J. Sheppard, R. Cross, T. Wilkinson, W. Gilbert, [and 10 others i... | Juniper, Jack, Early works to 1800, English fiction, 18th century | 1 | 267 | Manifest_HistAndGeo | 0.003731 | 1780 | 1 |
| 1304 | 7276 | 1135900102.xml | 1135900102 | T209625 | 1800 | The persian moonshee. By Francis Gladwin. The third edition. Adapted to the use of the College a... | Volume 2 | Gladwin, Francis | Calcutta : [s.n.], 1800. | Persian language, Grammar, Early works to 1800 | 1 | 313 | Manifest_LitAndLang2 | 0.003185 | 1800 | 1 |
| 1305 | 7279 | 1136000102.xml | 1136000102 | T120865 | 1797 | The beggar girl and her benefactors. In three volumes. By Mrs. Bennett, ... | Volume 2 | Bennett, Mrs. (Agnes Maria) | Dublin : printed by P. Wogan, 1797. | English fiction | 1 | 357 | Manifest_LitAndLang2 | 0.002793 | 1790 | 1 |
| 1317 | 7371 | 1143800302.xml | 1143800302 | N029290 | 1778 | Julia de Roubign<c3><a9>, a tale. In a series of letters. Published by the author of The man of ... | Volume 2 | Mackenzie, Henry | London : printed for W. Strahan; T. Cadell, in the Strand; and W. Creech, at Edinburgh, MDCCLXXV... | Fathers and daughters, Fiction, Early works to 1800, Married women, Fiction, Early works to 1800... | 1 | 207 | Manifest_LitAndLang2 | 0.004808 | 1770 | 1 |
| 1329 | 8591 | 1167201202.xml | 1167201202 | N018907 | 1772 | Letters from Elizabeth Sophia de Valiere to her friend Louisa Hortensia de Canteleu. By Madam Ri... | Volume 2 | Riccoboni, Marie Jeanne de Heurles Laboras de Mezi<c3><a8>res | Dublin : printed for J. Potts, J. Williams, T. Walker, and C. Jenkins, Booksellers, 1772. | Epistolary fiction, 18th century, Translations into English | 1 | 237 | Manifest_SSAndFineArt | 0.004202 | 1770 | 1 |
| 1336 | 7491 | 1181800602.xml | 1181800602 | T014728 | 1774 | A sentimental journey through France and Italy. By Mr. Yorick. ... | Volume 2 | Sterne, Laurence | London : printed for T. Becket, Corner of the Adelphi, in the Strand, MDCCLXXIV. [1774]. | Clergy, Fiction, Travelers, Fiction, British, Italy, Fiction, British, France, Fiction, France, ... | 1 | 212 | Manifest_LitAndLang2 | 0.004695 | 1770 | 1 |
| 1359 | 415 | 1215400502.xml | 1215400502 | N008482 | 1785 | History of the Honourable Edward Mortimer. By a lady. ... | Volume 2 | Gwynn, Albinia | London : printed for C. Dilly, Poultry; G. Wilkie, ST. Paul's Church-Yard; and T. Hookham, Bond-... | English fiction, 18th century | 1 | 173 | Manifest_HistAndGeo | 0.005747 | 1780 | 1 |
| 1367 | 8596 | 1219500502.xml | 1219500502 | N000979 | 1766 | Chrysal: or, the adventures of a guinea. Wherein are exhibited views of several striking scenes,... | Volume 2 | Johnstone, Charles | Dublin : printed for Henry Saunders in Castle-Street, and Hulton Bradley, in Dame-Street, MDCCLX... | English fiction, 18th century, Guinea (Coin), Fiction, Early works to 1800 | 1 | 253 | Manifest_SSAndFineArt | 0.003937 | 1760 | 1 |
| 1369 | 7750 | 1219601202.xml | 1219601202 | T076282 | 1783 | The two mentors: a modern story. By the author of The old English baron. | Volume 2 | Reeve, Clara | Dublin : printed for S. Price, W. and H. Whitestone, W. Colles, W. Gilbert, R. Moncrieffe, T. Wa... | Epistolary fiction, English, Early works to 1800 | 1 | 329 | Manifest_LitAndLang2 | 0.003030 | 1780 | 1 |
| 1372 | 7768 | 1220200602.xml | 1220200602 | T060973 | 1777 | The excursion. In two volumes. By Mrs. Brooke, Author of the History of Lady Julia Mandeville, a... | Volume 2 | Brooke, Frances | Dublin : printed for Messrs. Price, Whitestone, Corcoran, R. Cross, Sleater, Chamberlaine, Potts... | English ficition, 18th century | 1 | 269 | Manifest_LitAndLang2 | 0.003704 | 1770 | 1 |
| 1373 | 7774 | 1220301302.xml | 1220301302 | T014793 | 1775 | A sentimental journey through France and Italy. By Mr. Yorick. ... | Volume 2 | Sterne, Laurence | London : printed for T. Becket, Corner of the Adelphi, in the Strand, MDCCLXXV. [1775]. | France, In literature | 1 | 219 | Manifest_LitAndLang2 | 0.004545 | 1770 | 1 |
| 1375 | 7779 | 1220600102.xml | 1220600102 | T068154 | 1776 | Arabian nights entertainments: consisting of one thousand and one stories, told by the Sultaness... | Volume 2 | Anon | Dublin : printed for W. Whitestone, J. Sheppard, B. Corcoran, J. Potts, R. Cross, W. Watson, D. ... | French fiction, 18th century, Translations into English | 1 | 315 | Manifest_LitAndLang2 | 0.003165 | 1770 | 1 |
| 1384 | 7802 | 1223700602.xml | 1223700602 | T161036 | 1756 | The connoisseur. By Mr. Town, critic and censor-general. ... | Volume 2 | Colman, George | Dublin : printed for George Faulkner, in Essex-Street, MDCCLVI. [1756]. | Art, Periodicals, Early works to 1800, Collectors and collecting, Early works to 1800 | 1 | 319 | Manifest_LitAndLang2 | 0.003125 | 1750 | 1 |
| 1406 | 7920 | 1238400102.xml | 1238400102 | N031674 | 1792 | Frederica: or the memoirs of a young lady. A novel, in two volumes. By a lady. Dedicated to Her ... | Volume 2 | Lady | Dublin : printed for Messrs. P. Wogan, P. Byrne, A. Grueber, W. M'Kenzie, J. Moore, J. Jones, W.... | Epistolary Fiction, English, Early works to 1800 | 1 | 283 | Manifest_LitAndLang2 | 0.003521 | 1790 | 1 |
| 1408 | 7923 | 1238400302.xml | 1238400302 | N035225 | 1764 | The moral tales of M. Marmontel. Translated from the French by C. Denis, and R. Lloyd. | Volume 2 | Marmontel, Jean-Fran<c3><a7>ois | London : printed for G. Kearsly, 1764. | Didactic fiction, French, Translations into English, Early works to 1800 | 1 | 295 | Manifest_LitAndLang2 | 0.003378 | 1760 | 1 |
| 1415 | 7960 | 1240901002.xml | 1240901002 | T060529 | 1745 | Persiles and Sigismunda: a celebrated novel. Intermix'd with a great Variety of Delightful Histo... | Volume 2 | Cervantes Saavedra, Miguel de | London : printed for Thomas Wright, Bookseller, at the Universal Circulating Library, in Exeter ... | Fiction, 18th century | 1 | 293 | Manifest_LitAndLang2 | 0.003401 | 1740 | 1 |
| 1426 | 8027 | 1243900202.xml | 1243900202 | N006226 | 1783 | Emma Corbett. In two volumes. ... | Volume 2 | Pratt, Mr. (Samuel Jackson) | London : printed for R. Baldwin, Pater-Noster-Row, MDCCLXXXIII. [1783]. | United States, History, Revolution, 1775-1783, Fiction | 1 | 258 | Manifest_LitAndLang2 | 0.003861 | 1780 | 1 |
| 1443 | 8200 | 1269300202.xml | 1269300202 | T014747 | 1768 | A sentimental journey through France and Italy. By Mr. Yorick. ... | Volume 2 | Sterne, Laurence | London : printed for T. Becket and P. A. De Hondt, in the Strand, MDCCLXVIII. [1768]. | Italy, In literature, France, In literature, Italy, Social life and customs, France, Social life... | 1 | 217 | Manifest_LitAndLang2 | 0.004587 | 1760 | 1 |
| 1466 | 8283 | 1287900102.xml | 1287900102 | T165661 | 1780 | The history of Tom Jones, a foundling. By Henry Fielding, Esqr. In three volumes. ... | Volume 2 | Fielding, Henry | Edinburgh : printed by and for W. Darling, Advocates Close: for W. Anderson, Bookseller in Stirl... | English fiction | 1 | 350 | Manifest_LitAndLang2 | 0.002849 | 1780 | 1 |
Data source: The 2018 END dataset paratext data: https://github.com/earlynovels/end-dataset/blob/master/end-dataset-master-11282018/11282018-full-paratexts.tsv
# Import the tsv file
END_footnotes_data = pd.read_csv('../11282018-full-paratexts.txt', sep='\t')
END_footnotes_data['type'].value_counts()
['Footnotes'] 271 ['Preface'] 244 ['Table of contents'] 213 ['Errata'] 124 ['Dedication'] 114 ['Advertisement'] 83 ['Introduction'] 69 ['Footnote'] 67 ['To the Reader'] 55 ['Note'] 52 ['Colophon'] 51 ['Poem'] 35 ['Conclusion'] 33 ['Copyright statement'] 32 ['Postscript'] 15 ['Letter'] 15 ['Appendix'] 13 ['About the author'] 12 ['Index'] 11 ['Epilogue'] 9 ['Essay'] 8 ['Hymns'] 8 ['Glossary'] 7 ['Dedication', 'Letter'] 7 ['Other'] 7 ['Endnotes'] 6 ['Official note'] 6 ['Table of contents.'] 6 ['Advertisement', 'To the Reader'] 5 ["Subscribers' list"] 5 ['To the Author'] 5 ['Character information'] 4 ['Notes'] 3 ['Key'] 3 ['Review'] 3 ['Advertisement', 'To the Reader', 'Letter'] 3 ['Apology'] 3 ['Argument'] 3 ['Explanatory Note'] 3 ["Subscriber's List"] 2 ['Printers imprint'] 2 ['List of characters'] 2 ['To the reviewer'] 2 ['Afterword'] 2 ['Footnotes.'] 2 ['Preface', 'First line: ""A preface, my good Sir!"'] 1 ['Conclusion', 'Conclusion, by the curate.'] 1 ['Preface', 'Preface.'] 1 ['Footnote', '""Demopheon ventis et verba er vela dedisti. Ov. Ep."'] 1 ['To the Reviewer'] 1 ['Addenda', 'Addenda by the editor.'] 1 ['Footnote,'] 1 ['To the binder'] 1 ['A novel describing Philadelphia during the yellow fever epidemic in 1793.'] 1 ["Subscribers' List"] 1 ['Copyright Statement'] 1 ['Sermon'] 1 ["Subscriber's list"] 1 ['Licence'] 1 ['Directions'] 1 ['Preface', 'To the Reader'] 1 ['Profit statement'] 1 ['Poem', 'Dedication'] 1 ["Editor's note"] 1 ['Dedication', 'Preface'] 1 ['Introduction', 'About the author'] 1 ['To the Subscriber'] 1 ['Preamble'] 1 ['To the bookseller'] 1 ['Historical note'] 1 ['Sidenote'] 1 ['To the Reader', 'Introduction'] 1 ['Dedication.'] 1 ['Author (paratext)'] 1 Name: type, dtype: int64
#Filter to include just footnote paratexts
END_footnotes_data_just_footnotes = END_footnotes_data[END_footnotes_data['type'].str.contains('Foot').fillna(False)]
print("Location of footnotes within END volumes:")
END_footnotes_data_just_footnotes.position.value_counts()
Location of footnotes within END volumes:
['Middle'] 301 ['Front'] 15 ['Back'] 9 ['Front', 'Middle'] 8 ['Front', 'Middle', 'Back'] 2 ['Middle, page 92'] 1 ['Middle', 'Some provide commentary on narrative; others seem to be indexing tools'] 1 ['Middle', 'Back'] 1 Name: position, dtype: int64
END_footnotes_data_just_footnotes.notes.value_counts()
['pp. 24, 65,'] 3
['Footnotes for v.1 can be found on the following pages: vii, 22 - 23; Footnotes for v.2 can be found on the following pages: 22, 76, 126, 128, 139, 185, 253; Footnotes for v.3 can be found on the following pages: 83, 151, 186, 210, 235, 287; Footnotes for v.4 can be found on the following pages: 26 - 27, 44.'] 2
['Footnotes appear on the following pages: iv, v, xi, xii, xxi-xxii, xxiv, xxvi, xxvii, xxx-xxxi, 45, 46-8, 57-8, 76, 81-9, 91, 93, 94, 102, 105, 106-7, 107-8, 109-10, 121, 130, 137, 139-41, 148, 150-1, 153, 156, 166, 167, 174-5, 176, 177, 178, 179, 181, 182-3, 184-5, 186-7, 189, 192, 199-200, 202-3, 207-8, 211-2, 213, 214, 216-7, 217-8, 223, 224, 229-30;'] 2
['Footnotes are located on the following pages: 100, 119, 126, 178, 185, 191, 225', 'Some footnotes indicate where in the collection the story related by a letter is picked up again'] 2
['A footnote on page 179 in volume 4 describes the fictional dialogues that follow: ""These dialogues were a kind present to the author by a friend."'] 2
['Page ii; Transcription: ""The author of the Seasons."" "'] 2
['A footnote appears in Vol. I on page ii of the dedication.'] 2
['Footnotes are found on the following pages:206'] 2
['A footnote appears on page 89.'] 2
['v.2 footnotes on pages 136, 316; v.3 footnotes on pages 92, 129'] 2
['Footnotes appear on pages 1, 2, 3, 11, 12, 13, 15, 22, 23, 35, 37, 40, 43, 46, 47, 49, 54, 56, 59, 61, 63, 66, 67, 68, 70, 72, 75, 122, 124, 127, 128, 131, 138, 142, 148, 149, 153, 158, 160.'] 2
["Some are attributed to Swift's friend Lord Orrery's 1751 commentary; others are unattributed, and occasionally comment on or correct Orrery's notes (e.g. p. 111, p. 166, p. 183). These notes explain rather than expand the satire; the opposite of the famously vertiginous Swiftian footnote. Footnotes on pages 4 (in Letter from Gulliver to Simpson), [1] (main text), 10, 12, 14, 15, 19, 21, 23, 29, 33, 35, 44, 54, 57, 72, 89, 90, 92, 111, 126, 129, 132, 139, 146, 152, 166, 178, 182, 183, 184, 185, 199, 202, 203, 208, 217, 232, 234-5, 236, 240, 248, 250, 256, 257, 286"] 2
['Descriptive/explanatory footnotes are found occasionally throughout the text'] 2
['Footnotes located on pages: 2, 5, 9, 12, 17, 21, 28, 29, 36, 37, 38, 39, 49, 52, 55, 60, 61, 65, 108, 109, 112, 115, 121, 126, 130, 132, 133, 134, 135, 136, 137, 140, 142, 144, 145, 148, 155, 171, 173, 174, 175, 177, 178, 182-3, 189, 200, 201, 203, 204, 205, 207, 209, 210, 213, 214, 216, 217, 218, 220, 221, 222, 223, 224, 225, 226, 260, 265, 269, 279, 281, 291, 317, 328, 330, 331, 332, 334, 335, 336, 337-8.'] 2
['p.2 and p.20'] 2
['pp. II.41. III.53. III.61. III.72, III.194, III.209, III.240, III.277, III.288'] 2
['Footnote transcription: "Voltaire" pg. 343', 'Footnote accredits quote to Voltaire: " "Mortels! ---voulez-vous tolérer la vie? --- Oubliez, & jouissez*" " pg.343.'] 2
['p. 189: "\'What a fine goose,\' was his saying, \'that lays me so many golden eggs!\'"'] 2
['Footnotes are indicated by an asterisk.', 'Footnote on pg.9 (first page of main text). Transcription of first sentence: "The reader will remember, that the Editor is accountable only for scattered chapters, and fragments of chapters; the curate must answer for the rest."', 'Footnote on pg.67, which carries onto the following page. Transcription of first sentence: "Though the Curate could not remember having shown this chapter to any body, I strongly suspect that these political observations are the work of a later pen than the rest of this performance."'] 2
['Explanatory footnotes'] 2
['On first page of text, an explanatory footnote reads, ""Not Litchfield in England, but Lichtfield, a supposed Prussian title."'] 2
['On pages: 137, 153, 164, 167, 169, 176, 178, 188, 192, 222.'] 2
['Appear frequently through the text'] 2
['Located pp. 44, 72, and 145, v.2; pp. 290-1, v.3; and p. 258, v.4. Example from v.3: ""Note by the editor. Cornelia seems to have alluded here to the following passage in the eloquent Sermins of Massillon: [...]"'] 2
['A footnote which provides an explanatory definition to a word in a selection of verse appears on p. 200, in "The adventures of Sophia."'] 1
['On p. 119. Transcription: "A late writer compares it [\'old-maidism\'] to a blighted tree upon an open common."'] 1
['Footnotes throughout explaining references made in the text.', 'Transcription : This actress declared that she preferred the applauses of Caesar to those of a whole theatre.'] 1
['Page 121 : An account of the events which are here related in regard to three brothers of a noble family in Portugal [...]'] 1
['pg.59 "A coin worth ten shillings."'] 1
['p.87; This severe satire, at first sight, might be construed into a general reflection upon courts and courtiers, and upon those sycophants who have sullied the press by writings calculated to lead the minds of youth to simulation, fraud, deceit, and false appearances of every kind, as the sure means of advancing themselves in the palaces of princes;', 'Note from the translator'] 1
['Attributional footnotes appear on p. 98, 124.', 'Footnotes relating to the world external to the novel appear on p. 138, 187, 298.', 'Footnotes which comment on the inclusion, omission, or ordering of letters or materials in the text appear on p. 218.', 'Footnotes which provide translations or definitions appear on p. 328, 438.'] 1
['Footnotes provide sources for quotations', 'Pg.67 : So admirably described in the exquisite poem of the Task, where he speaks of the alcove [...]'] 1
['Page 142 : The Author humbly hopes that those of his readers whom he has failed to convince by his Introductory Defence, will at least be contented with the opinions, held forth in this Tale by the enraged Smedley. [...]'] 1
['Page 4 : See page 132 and 161 of Political Justice, where the two last contradictory absurdities will be found verbatim.', 'Page 9 : Political Justice.', 'Page 10 : Rousseau.', "Page 122 : For the sake of the English reader, these words may be found in Johnson's Dictionary.", "Page 129 : Godwin's Political Justice, page 86.", 'Page 142 : If is a known fact, that every part of the human body has a regular price. [...]', "Page 152 : Godwin's Political Justice, page 804.", 'Page 177 : It is the practice of the new school to exalt every thing savage.', 'Various footnotes throughout. Godwin is mentioned frequently.'] 1
['A footnote on page 75 of Vol. II cites an author, M. de Marivaux.'] 1
['Pg. 59 : The author here vouches for the truth of this assertion occurring to a lady of her acquaintance.'] 1
['Attributional footnotes appear in Vol. I on p. 4, 206, 207, and in Vol. II on p. 95, 121, 125.', 'Footnotes which provide definitions or translations of text appear in Vol. I on p. 207.', 'Footnotes which reference the world external to the novel appear in Vol. I on p. 209 and in Vol. II on p. 66, 69, 127, 241.', 'Footnotes which extend the fictionality of the work appear in Vol. I on p. 211.', 'Footnotes which provide content directly from other sources appear in Vol. II on p. 47, 48, 69, 225, 238-239.', 'Indexical footnotes appear in Vol. II on p. 137.'] 1
['Footnotes can be found on pages 9, 10-11, 19, 21, 23-4, 28-9, 32, 37, 40-1, 43, 50-2, 61, 64, 79, 94-5, 97, 115, 123-4, 128, 130, 132, 134-5, 141-4, 149, 155, 167-8, 173-6, 180-1, 184-97, 206, 211-2, 214-5, 219-24, 232-3, 247, 249-50, 254, 262, 264, 269-70, 284, 291-2, and 298.'] 1
["Author's footnotes appear on pages 70 and 174."] 1
['A footnote appears on p. 8-9 commenting on the origin of the phrase "rosy neck." An editorial remark occurs at the note\'s conclusion: "If thou shouldst be unlucky enough to ask, gentle reader, to what purpose serves this note? we shall answer thee in the sarcastic way, by asking thee, To what end serve all other notes but to shew the author\'s reading?"', 'A footnote appears on p. 46-47 which begins: "We are apt to suspect, that the terrible fury of the modern race of critics will fall as foul upon our living earth, as upon the rosy neck we have before been at the pains of vindicating from them." Concludes: "For the reason and necessity of this note, see the very learned Mr. Warburton\'s Shakespeare, variis locis."', 'Both footnotes provide word and phrase origin, references to other works, and editorial commentary.'] 1
['On p. 286 of Vol. I, a footnote is denoted with an asterisk; it clarifies information on the world external to the novel ("A Carthusian monastery, which practices the greatest austerities.")', 'On p. 13 in Vol. II, an "(a)" denotes a footnote, which extends the fiction of the novel by offering a plot explanation ("He had only a thousand livres in his hands, because he had remitted Clarissa the four thousand he had told of her.")', 'On p. 145 of Vol. II, a footnote appears with an asterisk which clarifies information on the world external to the novel ("In places near the parliaments, the peasants imagine, that, next to a prince, nobody is so great as a president.")', 'On p. 174 of Vol. II, a footnote is denoted by an asterisk; it clarifies information on the world external to the novel ("Noviciates in monasteries, are places set apart for the receptions of young persons, who intent taking the religous habit.")', 'On p. 182 of Vol. II, a footnote is denoted with an asterisk; it clarifies information on the world external to the novel ("A part of the breviary.")'] 1
['Appears on p. 57 of Vol. I; explains omission of letters from the text.'] 1
["A footnote appearing on page 19 references the author's time as a clerk to John Ashby, Esq."] 1
['Footnotes can be found in Vol. I on p. 22 (attributes a quote to Shakespeare); in Vol. II on p. 49 (indexical), 133 (attributes a quoted line to Mason\'s Elfrida), 166 (attributes a quoted line to Smart); in Vol. IV on p. 167 (two footnotes appear: one clarifies a reference to a figure external to the novel, whereas the second attributes a quoted line to "Life of Crowley"); and in Vol. V on p. 82 (indexical),'] 1
['On p. 354 of Vol. III and p. 312 and 482 in Vol. V, footnotes attribute quoted lines of text to Dryden, Shakespeare and Young.'] 1
['Some footnotes explain historical or legal concepts as well as provide certian definitions. The majority, however, are translations of Greek or Latin text.', 'Footnotes appear on p.12, 19, 28, 30, 48, 256, 257, 259 of Vol. I; p. 258, 290, 291 of Vol. III, and p. 46, 47, 285, 288 of Vol. IV.'] 1
['The footnotes only appear in the endnotes.', 'Footnotes are located on p.216, 224, 235-237, 244, 247, 248, 256-258, 266, 269, 271, 272, 273, 274, 276, 277, 294, 295, 296-298, 299, 301, 303, 306, 308, 309-310, 314-315, 316, 317, 318, 319-320, 327.'] 1
['h Footnotes appear on the following pages: 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 20,21, 22, 24, 25, 28, 29, 30, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 54, 55, 56, 59, 62, 63, 64, 65, 68, 69, 70, 71, 72, 74, 75, 76, 77, 78, 79, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 95, 96, 97, 99, 100, 101, 104, 113, 114, 115, 116, 117, 118, 119, 120, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 134, 135, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 153, 154, 155, 156, 160, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 210, 211, 216-218, 220, 221-222, 223, 224, 225-226, 226-227, 228, 229, 230-232, 232-233, 234, 235-236, 244-245, 248-249, 251, 254, 255, 256-258, 259, 260, 261-262, 262-263, 264-265, 266-267, 269-270, 270-271, 272, 274, 276, 277, 278, 279-280, 280-281, 282, 283, 284, 285, 286, 287, 288-289, 291, 292, 293, 294, 295-296, 297, 298-299, 300-302, 305, 306-307, 308, 309, 312-313, 317-318, 318-319, 320, 321, 322, 323-324, 324-325, 326, 328-329, 335, 337, 342, 344-345, 346, 348, 349, 350, 352-353, 354, 355, 359, 360. The work is heavily footnoted, with multiple footnotes on most pages.'] 1
['Foonotes appear in Vol. I on pages 18, 22, 71, and 128; and in Vol. II on pages 3, 11, 18, 37, 129, 155, 156.'] 1
['A footnote marked with three asteriks appears on the title page below the publication information: "***If any persons should entertain any doubt whether these letters are really original he may recieve full satisfaction by calling at the publisher\'s."'] 1
['Footnotes that describe the world external to the novel appear on pages 1, 5, 15, 18, 19, 20, 21, 26, 35, 44, 45, 48, 68, 71, 73, 74, 77, 78, 79, 86, 87, 92, 99, 100, 101, 103, 106, 120, 123, 129, 132, 143, 147, 150, 152, 157, 159, 193, 198, 217, 225, 228, 232, 245, 253, 259, 267, 272, 273, 278, 285, 286, 304, 310, 320, 333, 347, 355, 359, 361, and 363. Many of these footnotes provide geographical and/or cultural information.', 'Footnotes that offer translations, definitions, or linguistic explanations appear on pages 5, 7, 8, 35, 37, 82, 108, 137, 138, 143, 145, 157, 159, 207, 216, 254, and 319.', 'A footnote primarily characterized by editorial voice appears on p. 87.', 'Footnotes which expand upon a story within the text (typically myths) can be found on pages 251, 252, 273, and 359.', 'On p. 289, a footnote references "a collection of fables" that "resemble very much those the Greeks have left us of Aesop."'] 1
['Footnotes that describe the external world appear on pages 28, 31, 36, 49, 61, 65, 109, 113, 120, and 188.', 'Footnotes that reference other works appear on pages 39, 54, 237, and 257.', 'Footnotes that offer editorial explanation or commentary appear on pages 62, 93, and 101.', 'Footnotes that provide translations or definitions appear on pages 62 and 65.', 'A footnote extending the fictional world of the novel appears on page 197.'] 1
['Footnotes on p. 82 of Vol. I and p. 253 of Vol. II explain that referenced letters have been omitted from the volume.', 'An asterik appears in both the text and next to the footnote at the bottom of the page.'] 1
['p.64'] 1
['Page 16 of v.1 : A down in Hampshire, on the borders of Sussex, the resort of both counties for cricket matches.', 'Page 3 of v.2 : When the ruins came to be cleared away, says Sir William Hamilton, the bodies of the men who had perished were universally found in the attitude of resistance; the women in that of prayer, unless it was those who had children with them, in which case they were observed to have taken such postures as were likely to shelter and protect them.', 'Footnotes throughout.'] 1
['Pg.22 of v.1 : A down in Hampshire, on the borders of Sussex, the resort of both counties for cricket matches.', 'Most appear in v.2'] 1
['Transcription p. 113 : See letter XX.'] 1
['Footnote is a transcription of an epitaph in Latin. (v.3, p.289)'] 1
['Descriptive/explanatory footnotes appear frequently throughout text, including in the paratext (in To the Reader)'] 1
['Multiple footnotes, most often referencing other letters or giving more information about characters'] 1
['Located pp. 282, 423, 425, v.1; and pp. 152, 161, 162, 163, 171, 235, 237, 238, 243, 522, v.2.'] 1
['Many footnotes throughout.'] 1
['Located on pg. 350.'] 1
['Footnotes in Preface and main text (v.1 and v.3-4) give textual citations and quotations in French and English.'] 1
["Footnotes throughout both paratext and text itself; in the Preface, reference is made to Goethe's The Sorrows of Young Werter, and some excerpts from that are quoted. On p. 43-45, a poem appears in an extended footnote."] 1
['Located on pgs. 42-3, 56, 175, 180-2, 184.'] 1
['Occasional explanatory footnotes. Located v.1: pp. 11, 14, 20, 39, 56, 86, 93, 95, 100, 106, 126, 127, 130. v.2: pp. 11, 57, 72, 73, 84, 85, 120, and so on.'] 1
['On pp. 135, 144, 147, 212, and 218.'] 1
['Occasional explanatory footnotes in both volumes'] 1
['Infrequent footnotes in volume I'] 1
['On p. 86'] 1
['Transcription : Coelebs, Vol. II. p. 100. 10th Edit. (Pg. vi)'] 1
['Several footnotes throughout the text.', 'Pg.38 in v.1 : A stoup is neither a bucket, nor a pitcher, nor a jar, nor an any thing but a stoup.'] 1
['Footnote cites a quote said by one of the characters.', 'Transcription : Corinne, Tome Premier, Chapitre iv. p.24.'] 1
['""Taming of a shrew""', 'Found on p. 37 of v.2"'] 1
["Pg. 75 : Three lines of Cooper's, speaking of Omai, a litle altered to suit the circumstance."] 1
['Each footnote is offset by an asterisk.', 'Transcription of first sentence of footnote on pg.21 of v.2: "The author hath by some been represented to have made a blunder here: for Adams had indeed shewn some learning (say they) perhaps, all the author had: but the gentleman hath shewn none, unless his approbation of Mr. Adams be such, but surely it would be preposterous in him to call it so."', 'Transcription of footnote on pg.52 of v.2: "Whoever the reader pleases."', 'Transcription of footnote on pg.78 of v.2: "All hounds that will hunt fox or other vermin, will hunt a piece of rusty bacon trailed on the ground."', 'Transcription of footnote on pg.145 of v.2: "Meaning perhaps ideas.', 'Transcription of footnote on pg.166 of v.2: "Lest this should appear unnatural to some readers, we think proper to acquaint them, that it is taken verbatim from very polite conversation.'] 1
['Lazzaroni, a word descriptive of people reduced to the utmost poverty and wretchedness.', 'Footnotes throughout quote Pope, Payne, Hayley and Shakespeare.'] 1
['Footnotes throughout the volumes provide translations, clarifications, and citations.', 'Pg.4 of v.1 : Alas! why have not I a Mama too.'] 1
['Various footnotes throughout clarify references made in the text.', 'Page 291 : This curious register is still in existence, being in possession of that eminent antiquary Dr. Dryasdust, who liberally offered the author permission to have the autograph of Duke Hildebrod engraved as an illustration of this passage.'] 1
['Located on pg. 58 of v.1, transcription : Motion--Puppet-shew'] 1
['Ladies are admitted into the gallery of the Irish House of Commons. (Pg. 54)'] 1
['Two footnotes cite quotations in text, see p. 108 and 158.', 'Transcription : Proverbs, chap. xxxi.'] 1
['Pg. 220 : Rousseau.'] 1
["Pg. 58 : Cumberland's fashionable lover."] 1
['Pg. 49 v.3: To lessen the number of agents necessary for the plan of this romance, the author has ventured occasional violations of the chronology[...]', 'Pg. 213 v.4: This incident is an historical fact.'] 1
['Footnotes throughout.', 'Pg. 186 : Rousseau.', 'Pg. 255 : footnote under HARAM: The quarter of the house or palace appropriated to the use of the women, and the children.'] 1
['Transcription of footnote on pg. 164 : A philosophical composition of our author.'] 1
['Footnotes throughout providing clarification for the text.', "P. 124 of v.2 : Vide Southey, from D'Herbelot."] 1
['A bird of the grous kind, common in the highlands of Scotland.'] 1
['Denoted with an asterisk in a sentence on page 113: "French, and see* their wretched policy, their sparkling, but sophistical discourse, frivolous occupations, and, withal, their gay animated air, we shall be compelled to acknowledge that happiness and folly too often dwell together."', 'Footnote transcription: "It must be remembered that this was said in the seventeenth century."'] 1
['Transcription of footnote marked by an asterisk on pg.148: "A part of the Highland dress which serves instead of breeches."', 'Transcription of footnote marked by a cross on pg.148:"The Highland broad sword."', 'Transcription of footnote marked by an asterisk on pg.218: "Nigg-nyes, or bawbles."', 'Transcription of footnote marked by an asterisk on pg.224: "Cheerful."', 'Transcription of footnote marked by an asterisk on pg.225: "Thrown away."', 'Transcription of footnote marked by an asterisk on pg.227: "Without."', 'Transcription of footnote marked by an asterisk on pg.380: "The Author has been informed that this distinction between the effect of audible and of visible objects, has been criticised as unnatural."', 'Transcription of footnote marked by an asterisk on pg.435: "Mason."', 'Transcription of footnote marked by an asterisk on pg.493: "Wale, or choice."'] 1
['Located on pg. 93. Transcription: "Smollet\'s Count Fathom, I."'] 1
['Transcription of footnore: The Messiah was formerly published in three volumes.'] 1
['Located on p. 15. Transcription: "Inconsiderate woman! woud you preserve your heart from the contagion which approaches you, write no more; write again, and you are undone."'] 1
['Footnotes on p. 79'] 1
['Footnotes on pgs. 87 and 171'] 1
['p. 108, 112, 117, 164, 195'] 1
['Located on p. 168, 201'] 1
['Footnote offset by an asterick referring to the Cavern of Death (p. 23): "Die Hole des Todes, is the name which is still retains ; and the neighboring Peasantry at this day dread to approach it, and entertain many wild and superstitious ideas respecting it."'] 1
['Transcription of footnote on p. 53: This trait of sagacity in the black Domingo and his dog, Fidele, very much resembles that of the savage Tewenissa, and his dog Oniah, mentioned by M. de Crevecaur, in his humane Work, entitled, Letters of an American Farmer.'] 1
['Footnotes located on p. 65, 99, 112, 134, 182, 188, 189.'] 1
['Footnotes appear on pages: VII, 6, 9, 28, 42, 48, 49, 50, 57, 58, 61, 65, 68, 71, 73, 77, 82, 85, 86, 87, 88, 90, 92, 94, 95, 96, 100, 101, 103, 111, 115, 116, 124, 127, 130, 131, 135'] 1
['Transcription of footnote on p. 114: Mary had been taught, besides, to address a short prayer to the Deity, immediately upon waking, to acknowledge his goodness in having defended her from all "perils and dangers of the night," a tribute no grateful mind can neglect to pay.'] 1
['Footnotes on nearly every single page with text. These footnotes contain proverbs and moral teachings with some being taken from the Bible. Transcription of first sentence of the footnote on p.9: "Satan could gain no advantage over Adam in paradise without the concurrence of his judgement, and consent of his will; but these being weakened, and at length overcome, by listening to the temptation." Transcription of first sentence of the footnote on p.17: "Beware of flattery and hypocrisy: especially of that cunning craftiness of false teachers, whereby they lie in wait to desceive unwary souls, having itching ears."'] 1
['Footnotes on pages: 341, 342, 343, 344, 346, 348, 350, 351, 352, 354, 355, 356, 357,358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 370, 371, 372, 373, 374, 375, 376, 377 378, 379, 380, 381, 382, 383, 384, 386, 389,390, 391, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 414, 415, 416, 417, 418, 424, 426, 431, 432, 433, 434.'] 1
['Transcription of footnote on p.173: "This was not meant to be Petrarch\'s Laura: the Bard seems to have thoughtlessly struck on the same name in the beginning of her Elegy. Note of the Editor."'] 1
['Located on p. 173. Transcription: "The writer was witness to a scene similar to the above not three years ago."'] 1
['Footnotes located on pgs.: 35, 49, 56, 67, 70, 80, 85'] 1
['Offset by an asterisk.', 'Transcription: "See on a subject similar to this, the admirable story of Mr. Wentworth, No.57 of the Mirror, (a periodical Paper) very deserving of attention."', 'Located on pg.58.'] 1
['Located on p. 153. Reads: "Those unacquainted with eastern history may consider it as an impropriety in Montesquieu to have made his hero so young: But Nature there, cherished by the warmth of the climate, shoots up with amazing celerity; and the records of Hindostan inform us, that the Sons of Shah Jehan one of the Mogul Sovereigns, commanded armies at twelve years of age."'] 1
['Footnotes on p. vii, ix, and xi.'] 1
['Footnotes on pages: 45, 57, 58, 67, 105, 127, 195.'] 1
['Located on pgs. 55, 242.'] 1
['Located on pgs. 3, 4, 5, 6, 8, 10, 11, 13, 14, 15 of Monody on Major Andre.'] 1
['Footnotes on pages: 6, 59, 87, 145, 365.'] 1
['Dedication has paragraphs-long footnotes on pgs. vii, xi, and xiv.'] 1
['Located on pg. 40.'] 1
['Located on pgs. 35, 49, 56, 67, 70, 80, 85.'] 1
['Footnotes on pp. 1, 9, 38, 127, 147, 163, 164, 165, 166, 176-7, 178, 181, 184, 185, 186, 187, 189, 192, 193'] 1
['Footnotes on p.7-8, 39.'] 1
['Located on pg. 83.'] 1
['Located on p. 5. Reads: "Duty on Salt."'] 1
['Located on p. 128. Reads: "A trasaction like this took place, not out of rememberance, nor a hundred miles from Nottingham."'] 1
['Location: pg. 43; Transcription: "So called from a corps of infantry called Buckler Men, who fought with sword and target, and whose gallant stand under the shelter of their bucklers, which were exceedingly large, saved the right wing of the army, commanded by the father of Emanuel in person, and turned the fortune of the day."'] 1
['Transcription of footnote on p.3: "When the ruins came to be cleared away, says Sir William Hamilton, the bodies of the men who had perished were universally found in the attitude of resistance; the women in that of prayer, unless it was those who had children with them, in which cafe they were observed to have taken such postures as were likely to shelter and protect them." Transcription of footnote on p.30: "A Carlini is 5d. in English"'] 1
['Transcription of the footnote on p.145-146: "Thomso - It has struck me as remarkable, that the French (I speak of them as they were), who either read English poetry in the original, or translated into their own language, preferred Young and Thomson, to out gayer and lighter poets, which, like their passion for tragedy, seems to contradict the generally received opinion of their national character."'] 1
['Middle', 'p. 92, "*"What a fine goose," was his saying, "that lays me so many golden eggs!"'] 1
['Transcription of first sentence of footnote on pg.26: "I trust I shall be excused the liberty I have taken with chronology (on more than one occasion) in making Lady Ormond quote passages written since her time."', 'Footnote offset by an asterisk.'] 1
['Transcription: "Sir Edward Stanley\'s letter to the Earl of Bleville, giving an account of Mrs. Montague\'s death, and the disposition of her fortune, was omitted to prevent tautology."', 'Located on pg.192 of text.'] 1
['"*It must be remembered that this was said in the seventeenth century." (p.113)'] 1
['Located on page 323', 'Transcription: twenty four books of Hananias* "The books of the Prophets."'] 1
['Transcription of first two sentences: "The favourable manner in which this little piece has been received by the public, calls upon the author to explain the grounds on which he composed it. But before he opens those motives, it is fit that he should ask pardon of his readers for having offered his work to them under the borrowed personage of a translator."', 'Transcription of first sentence: "The following remark is foreign to the present question, yet excusable in an Englishman, who is willing to think that the severe criticisms of so masterly a writer as Voltaire on our immortal countryman, may have been the effusions of wit and precipitation, rather than the result of judgment and attention."', 'Located on pgs.XXII-XXIII of the preface to the second edition.', 'Marked by an asterisk in the statement "Voltaire is a genius"'] 1
['Located on page 234. Transcription:"*1789."'] 1
['Transcription of footnote located on pg.98: "The author cannot help here remarking that as this story is authentic, and not the offspring of fancy, she hopes it will make a lasting impression on the minds of her fair readers."', 'Transcription of footnote located on last page of text (pg.288): "N.B. The Two last Tales have formerly appeared in a Magazine."'] 1
['Transcription of footnote: "Terms used in heraldry" pg.58'] 1
['Located on pg.101. Transcription: "The par is a small fish, not unlike the smelt, which it rivals in delicacy and flavour."'] 1
['Located on page 20, denoted with an asterisk. Transcription of denoted sentence: "But read the letter yourself; the following is a true copy of it*." Transcription of footnote: "The reader has already perused it in Letter XCIV. to which he is referred."'] 1
['"This alludes to an antient pile of four rough and enormous stones on the hill; which some antiquaries have called a druid altar--but it seems more probably to have been raised as a sepulchral monument." p.2'] 1
['How much more rationally might many of the social hours of young people be employed in reading some book of entertainment and improvement, than in the too common custom of wasting them at cards, or in vain and unmeaning conversation.', 'The numbers are from Joseph Addison\'s "The Spectator"'] 1
['Located on page 149. Transcription: "This letter does not appear."'] 1
['"Remains of the ancient people of Vaudois, who inhabited the vallies lying between Provence and Dauphiny in France. For an account of them see Voltaire\'s works."'] 1
['Longest footnote in entire work appears on the first page of the main text: "All the effects of strangers (Swiss and Scotch excepted) dying in France, are seized by virtue of this law, though the heir be upon the spot--the profit of these contingencies being farm\'d, there is no redress."'] 1
['"*This tale is explanatory of two celebrated pictures of Poussin."', 'p.130'] 1
['Footnote located on pg. 189 used to accredit a quote to Shakespeare. Transcription of accredited quote: "O, it came o\'er mine ear, like the sweet south, --- That breathes upon a bank of violets --- Stealing and giving odour". Quote taken from Shakespeare\'s Twelfth Night. Transcription of footnote: "Shakespeare."'] 1
['Transcription of footnote on p. 193: "Sir William Blackstone in his Commentaries."'] 1
['Footnote is marked by an asterisk on pg.vi of the preface. Transcription of footnote: "Mr. Rousseau is mistaken here. The Old Robinson Crusoe has plenty of tools and instruments, which he saves from the wreck of a ship; whereas the New Robinson Crusoe has nothing but his head and his hands to depend on for his preservation."'] 1
['Takes up half of page 139 and most of 140. Transcription of first sentence: "Of this the good Doctor, mentioned in the First Volume, has given a striking proff, which I shall here relate."'] 1
['Located on page 237. Transcription of first sentence: "The body of the duke of Exeter was, not many years ago, dug up from amidst the ruins of the abbey of Bury St. Edmunds, in perfect preservation."'] 1
['Transcription: "See Vo. IV. page 24."', 'Footnote is marked by an asterisk on pg.82 in the following the part of a sentence that reads, "\'...evils inevitable are always best supported, because known to be past amendment, and felt to give defiance to struggling.\'*"'] 1
['Located on page 163. Transcription: "I cannot resist the inclination I feel to quote a reflection of M. de Sainte-Foix, nearly on the same subject..."'] 1
['p. 68. Transcription: "*This letter does not appear."'] 1
['p. 213. Quotes Dr. Deane on the term "husbandry."'] 1
['Located on pg.82. Transcription: "The par is a small fish, no unlike the smelt, which it rivals in delicacy and flavour."'] 1
['On p. 189: "The manuscript was so exceedingly blotted in this place, that it was hardly legible."'] 1
['p. 8, "If this was self-evident in 1793, it is much more so now..."; p.196, "Such dialogues as these have passed, with the additional circumstances of..."; p. 211-212, "Thomson.---It has often struck me as remarkable, that the French, (I speak of them as they were) who either read English in the original, or translated..."; p. 215; p. 218; p. 227; p. 233'] 1
['Located on pg.255. Transcription: "A military man might indulge himself in this sally; but, the author disclaims it, being assured that there are in this country some exceptions, who from their united abilities and integrity are an honour to a profession which is generally, and not unjustly, stigmatised as the scourge of society."'] 1
['Located on page 235. Transcription: "A patrole or watch in Spain."'] 1
['Located on pg.235. Transcription: "A patrole or watch in Spain."'] 1
['Located on page 373. Transcription: "The Hudson and the East-River or Sound, which meet at the south-west end of the city."'] 1
['on p. 148 and 185'] 1
['Footnotes, most of which give historical context and citations, appear throughout text.'] 1
['Appear frequently throughout the text'] 1
['Usually informational, throughout the text. Some reference other points in the text, reference other texts, or even contain other texts in the form of poems by authors like Alexander Pope.'] 1
['Located p.232', 'Transcription: ""Aesculapius was the son of Apollo by the Nymph Coronis; his father gave him into the hands of Chiron the centaur to study Physick""."'] 1
['Pg. 193, 196, 205, 207. In the voice of the editor, comments on missing or illegible (coded) letters.'] 1
['Footnote, p.69, is in Mr. Dorrington\'s voice: ""The ship belonged to Bristol, I communicated following memoirs to a friend in London, in order to be published...In the meantime, I have business calls me to Peru and Mexico again...And so I take my leave, and end the first part.""', 'Footnote, p.106, explains use of quotation marks: ""The lines marked with ("") are set down word for word with his memoirs, for these reasons, 1. I thought it a pity to alter any thing from his notions of the pleasures of a country life. 2. And the rather, because it gives us very lively ideas of the perfect happiness he enjoys in his solitude."'] 1
['Several footnotes indicated by asterisks or crosses: 143, 144-145, 170, 209,', '""The translator\'s compliments to the reader, and is sorry to trouble him too son with another note. Begs leave, however, to assure him upon the faith of the following italics...""', "Unclear if both footnotes are part of the Diploma narrative form adopted v.2, chap.VI or if they author's commentary on the Diploma section.", 'Footnote indicated by asterisk (143-144): ""If the affaire d\'honneur related in Chap. XVI. vol.1 be here recollected...But it is no business of ours to enter into any disquisition upon that Head, being obliged only to narrate facts, not to reconcile things that are in themselves contradictory...""', 'Footnote indicated by asterisk (170): ""Whether the doctor did not contradict himself a little here, we leave the reader to determine"".', 'Footnote indicated by asterisk (209): ""The reason that the learned world have not yet been favoured with this valuable work, we are intirely [sic] at a loss to account for""."'] 1
['Footnotes appear in v.1 on pages: 49, 52, 71, 102, 114, 125, 171, 189, 194, 237,', 'Footnotes appear in v.2 on pages: 130, 131, 137, 163, 234,', 'Footnotes include information about characters, reference, and clarification'] 1
['First footnote: ""As this conclusion of the foregoing character is so short, it is inserted to preserve the connection, and save the reader the trouble of referring."'] 1
['""* \'Tis presumed by the compiler of these sheets, that the explanation of this heroglyphick, is the peculiar province of the pseudo-Nazarene students, grazing upon the banks of Isis, and famous for inventing annually dark and amusing AEigma\'s."'] 1
['In Latin.'] 1
['Located v.1 p.59, 108, 112, 113, 145. V.2 p. 204, 252. V.3 p. 56-7, 204.', 'Explanatory and citational footnotes, of people, works, and places referenced in narrative.'] 1
['Explanatory footnotes. Likely be original to the English translation.'] 1
['Mostly informational footnotes beginning in the introduction and continuing throughout the text. Notes are often long, and there are often more than one per page.', 'Notes are written in the voice of an editor, who the title page claims is Signor Rhedi (""with critical notes of the learned Signor Rhedi""). There is also at least one note commenting on the editor\'s notes."'] 1
['Footnotes appear on pages 4, 7, 8, and 66'] 1
['The footnotes appear on pages 72, 81, 104, 114, 147, 150, 160, 173, 180, 197, 198, 233, 249, 254, 263, 265, 273, 274.'] 1
['Footnotes appear on pages 17, 20, 26, 30, 32-4, 37-8, 43, 47-8, 51-2, 54-8, 65-7, 75, 77, 79, 82, 91-3, 98-9, 105, 107-8, 110, 114, 117-8, 128, 138-40, 142, 157-61, 197, 203, 208, 211, 218, 222, and 239.'] 1
['Footnotes marked with asterisk, occur throughout text. Internal references, references to and quotes from other works, some explanatory footnotes. Significantly more footnotes in v.3 and v.4 than v.2.'] 1
['In v.1, information about letter writer and addressee footnoted with: ""This letter was intercepted by Miss Penelope Fitzroy, and never delivered to her sister"" (p. 114). In v.2, footnote explains that one character is alluding to ""The Gordian Knot"" (p. 266)."'] 1
['Footnotes appear on the following pages - v.1: 20, 42, 64, 69, 70, 90, 92, 93, 113, 126, 133, 149, 152, 153, 154, 162, 182, 197, 214, 232, 233, 235, 236, 245, 247; v.2: 94, 199, 203, 216, 217, 218, 262, 273, 280, 282, 294, 304,'] 1
['Explanatory footnotes on pp. 33, 70, 79, and 85.'] 1
['Footnotes give geographical information, writings by ancient Romans, description of Turks and Moors and their culture, laws/customs, historical information.', 'Located in v.1 on pages 5, 24-25, 141-142, 179, 181.', 'Located in v.2 on pages 102, 115, 117 122-123, 124, 126, 128, 140, 153-155, 157, 174, 193(191)-192, 194-195, 200, 205, 219, 243, 255.'] 1
['A factual note about a place name, located on the first page of the text.'] 1
['One note at the end of a letter, located on page 216.', 'Transcription: ""See the continuation of her story in Part II. p. 286."'] 1
['Pages 49, 50, 51, 54, 55, 56, 57, 58, 62, 65, 67, 68, 70, 71, 76, 77, 81, 83, 87, 92, 97, 98, 100, 101, 103, 104, 173, 175, and 319.'] 1
['First footnote: ""*Essay on ancient and modern learning."'] 1
['Informational footnotes throughout the book, some of which reference other works.'] 1
['Acts as a citation for a quotation.', '""Le Comte, Vol. 1 p. 210."'] 1
['""* The same for Paris as Tyburn for London."'] 1
['First page of v.1 and last page of v.2 have notes at the bottom of the page, not marked with an asterisk or other symbol and not clearly referring to specific sections of the text, explaining the moral message of the text.', '""These voyages are intended as a moral political romance--to correct vice by shewing its deformity in opposition to the beauty of virtue, and to amend the false systems of philosophy by pointing out the errors, and applying salutary means to avoid them. ORRERY. (v.1)', '""To mortify pride, which indeed was not made for man, and produces not only the most ridiculous follies, but the most extensive calamity, appears to have been one general view of the author in every part of these travels. Personal strength and beauty, the wisdom and the virtue of mankind, become objects not of pride but of humility, in the diminutive stature and contemptible weakness of the Lilliputians; in the horrid deformity of the Brobdingnagians; in the learned folly of the Laputians, and in the parallel drawn between our manners and those of the Houyhnhnms."" (v.2)"'] 1
['On pg. vii-viii, xii-xv, xvii, xx, xxxvi-xxxvii, xxxix-xlii, xlv, xlix-li, lvi-lvii, lxiii-lxviii, 1, 5-7, 9, 11-3, 15-8, 20, 22-7, 29-30, 32-42, 49-53, 57, 62-9, 71-3, 76-80, 83, 85-6, 94-5, 101-3, 110, 113-4, 116-7, 119-22, 124, 129-32, 135, 137, 144-5, 147-8, 150, 153-4, 156-9, 164-6, 168-73, 177-81, 183-4, 186-7, 194, 197, 202, 205, 208-9, 215, 218-9, 224, 226-8, 230-5, 238-42, 249, 255, 277-8, 262, 273, 276, and 278-81.', 'Indicated by asterisk or cross.'] 1
['On page v, viii, ix, x, xi, xiii, 4, 5, 7, 9, 12, 14, 15, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 50, 51, 52, 54, 57, 58, 59, 60, 61, 62, 63, 65, 66, 67, 69, 70, 75, 76, 79, 80, 84, 85, 86, 87, 88, 89, 90, 91, 94, 95-96, 98, 99, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 123, 124, 125, 126, 128, 137, 140, 142, 144, 147, 149, 150, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 169, 173, 174, 177, 183, 185, 186, 187, and 188.', 'Indicated by asterisk or cross.'] 1
['""*Lycomene is a month in which the moon has a more than ordinary effect on the Otipolians; for then every man becomes in perfection which his genius in an inferiour degree prompted him to be all the rest of the year."'] 1
['One explanatory footnote, p.2'] 1
['Footnotes are indicated by an asterisks or a cross.', 'Footnotes occur on pages 107,173, 177, 178, 180, 185, and 186.'] 1
['Footnote on I.5'] 1
['Footnotes appear on the following pages - v.1: 20, 42, 64, 69, 70, 90, 92, 93, 113, 126, 133, 149, 152, 153, 154, 162, 182, 197, 214, 232, 233, 235, 236, 245, 247; v.2: 94, 170, 199, 203, 216, 217, 218, 262, 273, 280, 282, 294, and 304.'] 1
['First footnote, p. iv: ""* Begum-Saheb, Aureng-Zebe\'s Sister, tended on Chah-Jehan during his imprisonment ; and when he was dead, she made Aureng-Zebe a present of a large golden bason, which contain\'d all Chah-Jehan\'s precious stones, and her own.""', 'On pg. iv, vi, x, xiv, 1, 2, 3, 4, 5, 6, 11, 12, 15, 16, 19, 23, 24, 28, 30, 33, 44, 48, 49, 51, 55, 59, 64, 65, 76, 81, 90, 113, 114, 127, 139, 141, 142, 143, 144, 148, 155, 159, 164, and 167."'] 1
['Separated from text by horizontal line, which comment upon the narrative text and on the character Gulliver and are signed either ""Hawkes"" or ""Orrery"'] 1
['Footnotes appear on pages: 73, 79'] 1
['Footnotes appear on pages vii,xii, 5-6.'] 1
['Footnotes on pages 3, 6,8, 11, 13, 39, 53, 85, 88, 89, 136, 146, 164'] 1
['p.243'] 1
['Several footnotes throughout'] 1
['Footnotes in astericks for Baucis and Philemon.--A letter to a young lady.--Verses on the death of Dr. Swift.'] 1
['p.3'] 1
['A footnote appears on v.2 page 30.'] 1
['Explanatory footnote appears in ""Supplement."'] 1
['Footnotes explaining certain characters or personages in the text'] 1
['Footnotes noted with astericks throughout'] 1
['Explicate allusions made to other works in text'] 1
['Occasional, both volumes, text, paratext.'] 1
['p.61'] 1
['pg. 9, 194'] 1
['Appear on pages 102, 202'] 1
['Footnotes appear on pages: v.1: 16, 35, 53, 57, 58, 75, 77, 78, 95, 106, 112, 122, 126, 129, 130, 137, 155, 168, 183, 198, 199, 200, 201, 209, 211; v. 2: 79, 133,169, 171, 183, 185, 222, 232, 238, 240, 250, 258.'] 1
['pp. 75'] 1
['Footnotes appear in v.1 on the following pages: ix, xx, xxx, 1, 2, 51, 52, 214; Footnotes appear in v.2 on the following pages: 305; Footnotes appear in v.3 on the following pages: 285'] 1
['On xxi, 15, 207'] 1
['A Footnote occurs in v.1 on page 2.'] 1
['Footnotes appear on the following pages: 16, 17, 24-5, 26, 28'] 1
['Footnotes appear on pages: 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 22, 23, 24, 30, 31, 36, 37, 43, 46, 55, 56, 59, 67, 72, 74, 81, 84, 86, 91, 203, 212, 214, 246, 257, 259, 260, 261, and 280'] 1
['I.50, I.163-4, II.10-1, II.59, II.70, II.209, II.210, II.211, II.214, II.223, II.241-2, II.243-5, II.249, II.269'] 1
['Footnotes appear on pages 9 and 71-72.'] 1
['Footnotes appear on pages 2,3,4,5,6,8,11,14,16,18,22,28,36,41,43,44,47,48,49,50,51,52,54,56,60,61,62,78,79,82,85,86,88,89,90,91,92,93,95,97,98,100,102,104,112,119,137,148,149,164,166,168,169,170, 173,175,178,180,191,192,193,194,198,202,205,206,207,209,214,215,216,221,222,223,225,227,229,231,232,233,235,239,242,243,244,245,246,247,248,254,260.261,268,270,273, 278,288,292,304,311,312,315,317,319,324,326,327,334,345,336,337,338,340,342,343,344,350,351,352,353,354,355,356,358,362,363,369,371,376,385,388,389,390,391,392,397,398,401,402,404,415,416,418,421,431,432,435,436,440,441,442,444,448,460,463,465,467,474,475,496,496,498,502,503,505,506,509.', 'Appear in the margins of pages'] 1
['Footnote on III.265'] 1
['Page 265'] 1
['Appears on page 7 of volume one.'] 1
['I.43, I.89, I.101, I.141, II.19, II.21, II.136, II.137, II.141, II.143, II.217'] 1
['Appear on pages 17, 44, 50, 54, 59, 189, 229.'] 1
['Footnotes appear on pages: v.1: 86, 90, 99, 182; v.2: 26;'] 1
['Numerous footnotes throughout text. Includes some references to other works; mostly explanatory footnotes.'] 1
['One footnote, bibliographic reference, p.vi.'] 1
['On p. 361: ""The reader who has perused Mr. Boswell\'s Journal of a tour to the Hebrides, (see page 409) and recollects the period when these letters must have been written, will doubtless be as much struck with this passage as the editor was before him.""'] 1
['Text contains two footnotes. First: ""The reader will remember, that the editor is accountable only for scattered chapters, and fragments of chapters; the curate must answer for the rest"" (p. 1). Second: ""Though the curate could not remember having shown this chapter to any body, I strongly suspect that these political observations are the work of a later pen than the rest of this performance"" (pp. 78-9)."'] 1
['Footnotes on pages 62, 81, 93, 96, 98, 145, 146, 172, 178, 179, 191, 193, 194, 211, 226, 236, 254, 311, 312, 383, 406, 448, 453, 464, 480, 490, 493, 494, 498, 504, 506, 510, 511, 524'] 1
['Occasional explanatory footnotes; these seem to appear only in v.1.'] 1
['Explanatory/Descriptive footnotes in Preface and throughout text'] 1
['Footnotes throughout front matter and main text.'] 1
['Located on pgs. 40, 142.'] 1
['Footnotes indicated by asterisks and crosses. Found on pp. 107, 109, 123, 137, 158, 163, 164, 165, and 171.'] 1
['On p. 65 and 228 of v.1 elaborating on things said in the text'] 1
['On p. 233. Translates a phrase written in another language'] 1
['Scattered footnotes translating French character dialogue'] 1
['""Subscriptions are taken in by John Murray, bookseller, no. 32, Fleet-Street...."'] 1
['Most footnotes explain elements of Spanish and Italian language and culture. Examples on pages 17, 49, 56, 64'] 1
['On pp. 9 and 24.'] 1
['Work includes a single footnote: ""See Catullus"" p32"'] 1
['Explanatory footnotes throughout.'] 1
['On p. 245.'] 1
['Explanatory footnotes, which appear mostly in the beginning of the text.'] 1
['Located on pages v.7 pages 369, 375, 378 and v.8 pages 385, 387'] 1
['This footnote is found on page xvii, in the Preface to the second edition. It extends over two pages and begins: ""The following remark is foreign to the present question, yet excusable in an Englishman, who is willing to think that the severe criticisms of so masterly a writer as Voltaire on our immoral countrymen, may have been the effusions of wit and precipitation, rather than the result of judgment and attention."'] 1
['Located throughout'] 1
['Located on pages 14, 29, 74, and 170'] 1
['Occasional footnotes throughout text, mainly explanatory'] 1
['Interesting footnote format; notes are embedded midtext as opposed to typical location in bottom margin.'] 1
['Work contains a single footnote, located p. 13. Transcription: ""Like me, the reader must take this for a satire rather than for Mrs. Verman\'s real opinion: though seemingly in earnest, she had too much sense not to know the errors she commended. Had she painted them to her daughter under their true colours, she would not have acted the part she wanted to perform.""'] 1
['Work contains a single footnote, located p. 13. Transcription: ""Like me, the reader must take this for a satire rather than for Mrs. Verman\'s real opinion: though seemingly in earnest, she had too much sense not to know the errors she commended. Had she painted them to her daughter under their true colours, she would not have acted the part she wanted to perform."'] 1
['pg. 402'] 1
['Satirical footnotes on nearly every page of text in ""Commentary on Gotham"" and ""Gotham and the Gothamites."'] 1
['Footnote located p. 40. ""The grey cougar. This animal has all the essential characteristics of a tyger..."'] 1
['Occasional, explanatory footnotes. Example on p. 120: ""Wilson, it seems, asked the old woman if the same man that knocked out her eye, bit off her nose!"'] 1
['In appendix, footnotes on pp. 170 and 213 citing Bible quotes.'] 1
['Located pgs. 5, 13, 35.'] 1
['Located pg. 16, 20, 21, 26, 28, 51, 52, 66, 69, 78, 79, 86, 99, 102, 111, 114, 116-7, 129, 132.'] 1
['Explanatory footnotes throughout text'] 1
['Extensive footnotes throughout both volumes. Many of these are explanatory/clarifying footnotes referring readers to other moments in the text (e.g. ""See Vol. II, p. 153""), and/or reminding them of earlier events. Footnotes are also used to respond to critiques of the novel."'] 1
['Located throughout. All are indexical. Many pages have multiple footnotes.'] 1
['P.33, bibliographic, p.119 explanatory, p.179 explanatory.'] 1
['Footnotes throughout the first half of volume 1. All footnotes preceded by asterisk. Majority give explanatory definitions or provide references to other works (i.e., footnote p.29, v.1: ""Vid. Hist. Eccles. Japan. Vol. I."")."'] 1
['""* When the reader considers our author not only as a foreigner but a Roman Catholick, he will not be startled when he meets with any such invectives on our church and state."'] 1
['On pages 2, 94, 115 and 225. Informational.'] 1
['Footnotes provide background information about historical events and figures, sometimes citing the source of this information.'] 1
['Historical citations for events that happen in the text.'] 1
['""Queen\'s Royal Licence] Queen Elizabeth would not admit the East India Company, at her first granting them to be a Corporation, to transport the King of Spain\'s silver coins into the East Indies, though the Merchants pressed it often, telling her, that her coin and stamp were not known in the East Indies, they thinking to get licence to send thither what silver they pleased."'] 1
['More frequent in prefatory essay than main text. Each footnote marked with asterisk. Some provide detailed definitions of places and objects mentioned in the text; majority cite other works, including the Bible.'] 1
['Many footnotes interspersed throughout the book.'] 1
['Footnotes on pp. 85 and 177 give background information on the island of Jocunda.'] 1
['Footnotes on page 76. Marked by asterisks.'] 1
['Include references to other works and explanatory notes. Some footnotes marked by asterisks, others by daggers.'] 1
['Footnotes found on pages: 6, 9, 19, 41, 127, 153.', 'Footnotes give information about fruit, customs, opinions.'] 1
['Footnotes expand upon ideas introduced in the text, but do not seem to be directly relavent to the narrative. For instance: ""*Horace in his satires, takes particular notice of ingratitude &c."" (p50, v.1)"'] 1
['Footnotes on pp. 69 and 72. Footnote on p. 69 corrects information given in one of the letters.'] 1
['Page number and short chapter summary for each chapter.'] 1
['Frequent until p. 75 after which they disappear'] 1
['On pp. 66, 70, 148. Footnotes give additional information and the ""editor""\'s opinion on theological issues."'] 1
['On pg. 247, in advertisement.'] 1
['Example: ""a few grunter\'s gigs"" footnoted ""A cant term for shillings"" (147)."'] 1
['Includes pages 88, 98, 104, 164; tend to be citations and references to translations, etc.'] 1
['Located on pages: 37, 38, 72'] 1
['Footnotes throughout text. Some are multipage length, example p204-215'] 1
['Explanatory footnotes found throughout all four volumes'] 1
['Located on page 68, 104, 129, 140'] 1
['Footnotes on pages 48, 50, 138, 214'] 1
['Two footnotes explain nautical terminology on pp. 90 and 120.'] 1
['pg. 148, second part'] 1
['Found on page 246'] 1
['Footnotes on pp. 50, 51, and 113.'] 1
['Explanatory footnote, marked by asterisk, p.47'] 1
['Explanatory footnotes throughout. Significantly more footnores in v.1 than v.2-v.3'] 1
['p.14. Marked by asterisk. ""To hope always, and never enjoy."" Translation of words in text, ""sperar sempre, non gioir mai."'] 1
['Located on p. 355, 360, 368.', 'Footnotes are located in ""A letter from the facetious Dr. Andrew Tripe..."'] 1
['Located on p. 434.'] 1
Name: notes, dtype: int64
# Extract just the 1-3 digit numbers
# NOTE: this method will also end up catching *VOLUME NUMBERS* (e.g. the "2" in "v.2")
# see later section on plotting for how we handle the 76 instances of volume numbers
END_footnotes_data_just_footnotes.notes.str.findall(r'[0-9]{1,3}')
END_footnotes_data_just_digits = END_footnotes_data_just_footnotes.notes.str.findall(r'[0-9]{1,3}')
# Let's look at just the roman numerals in the page numbers
# NOTE: we'll also expect to see 'v's" from the volume markers
# see later section on how why the "v's" are removed
END_footnotes_data_just_footnotes.notes.str.findall(r'(?i)\b(?=[ivxlcdm]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})\b')
230 [] 240 [] 285 [] 301 [] 312 [, ] 319 [] 339 [, , ] 348 [] 370 [] 390 [] 394 [, , ] 395 [, , , , , ] 403 [] 419 NaN 524 [] 579 [] 580 [] 581 [] 582 [] 585 [vii, xii] 587 [iv, v, xi, xii, xxi, xxii, xxiv, xxvi, xxvii, xxx, xxxi] 588 [iv, v, xi, xii, xxi, xxii, xxiv, xxvi, xxvii, xxx, xxxi] 591 [] 592 [] 600 [v] 601 [] 608 [, ] 609 [v, v] 610 [] 611 [I, I, I, I, II, II, II, II, II, II, II] 612 [II, III, III, III, III, III, III, III, III] 613 [II, III, III, III, III, III, III, III, III] 615 [] 616 [] 617 [III] 618 [] 619 [] 623 [I, I, II, II, II, II, II, II, II, II, II, II, II, II] 626 [] 628 [] 632 [, v] 638 [xxi] 639 [, v, ix, xx, xxx, , v, , v] 644 [v, v] 645 [v, v] 646 [v, v] 649 [I] 651 [, , , , , , , ] 652 [, , , , , , , ] 662 [] 665 [v, v] 667 [] 668 [v, , vii, v, , v, , v, ] 669 [] 671 [, , I, ii, ] 672 [ii] 673 [, , , , ] 677 [, , , , , ] 708 [] 711 [v, v] 713 [] 714 [v, , vii, v, , v, , v, ] 715 [] 717 [, , I, ii, ] 718 [ii] 719 [, , , , ] 723 [, , , , , ] 756 NaN 762 [] 763 [] 767 NaN 769 [] 777 [] 778 [] 780 [, , , , , , , , , , , , , , , ] 783 [] 784 [, v, V, V, , ] 789 [, , , , ] 796 [, , , , ] 809 NaN 810 [, , , , , , ] 817 NaN 818 [, v, , v, , , , ] 819 [, , , , , , , , v, , VI, , , , , , d, , , XVI, , , , , , , , , , , , , , , , , , , ] 830 [, , , , , I, , , , , , , , I, , , , I, , , , , , , , I, , , , , , , , ] 831 [, , , , , , ] 835 [, , , , ] 837 [, , , , v, v, v] 838 [, ] 842 NaN 843 [, v, , , , , , , , v, , ] 848 [] 854 [, ] 855 [] 862 [, , , , , , , , , , ] 864 [v, viii, ix, x, xi, xiii, , ] 865 [vii, viii, xii, xv, xvii, xx, xxxvi, xxxvii, xxxix, xlii, xlv, xlix, li, lvi, lvii, lxiii, lxvi... 867 [v, , v, , , , , , , , , , , , , , , v, , , , , , , , , , , , , , , , , , , , , , , , v] 869 NaN 870 [, , , ] 874 [] 877 [] 879 [, ] 880 [] 881 [, , , , II] 883 [] 888 [, , , , , , , , , v, , , v] 890 [, ] 897 [iv, , , , , , , , d, , iv, vi, x, xiv] 900 [vi] 904 [] 905 [, ] 907 [, , , ] 910 [, , , , , , , , c, v] 912 [, ] 913 [, , ] 914 [] 915 [, ] 919 NaN 921 [, , , i, v, , , I] 924 [, , , , , , , , , , ] 927 [, , , , , , , , , , , , ] 931 NaN 932 [, ] 933 [, , ] 935 [] 937 [, , , , ] 944 NaN 947 [] 951 [, ] 954 [, ] 961 [] 962 [, , , , ] 970 [, , ] 972 [, , v, v, v] 983 [] 985 [] 987 [] 989 [] 992 [] 993 [] 995 [] 1000 [] 1007 [, ] 1014 NaN 1017 [] 1021 [] 1025 [, ] 1027 [] 1031 [, , , ] 1032 [, v, v] 1033 [, , , , , , II, ] 1035 [] 1038 [, ] 1040 [] 1041 [, , , ] 1050 [, ] 1058 [, ] 1067 [v, ] 1068 [, , , , , , , , , I, ] 1070 [, ] 1071 [] 1073 [, ] 1079 [, ] 1081 [, v] 1085 [] 1086 [] 1088 [, ] 1090 [, ] 1092 [, ] 1095 [] 1101 [] 1122 [] 1129 [, ] 1132 [, , ] 1133 [] 1140 [, , ] 1142 [] 1148 [, xvii, , , , , , , , , , , ] 1150 [, , ] 1154 [, , , , , , , , , , , ] 1156 [, , , , , , , , , , , ] 1162 [, , , , ] 1165 [] 1170 [] 1180 [] 1184 [] 1187 NaN 1192 [, , , , ] 1193 [] 1195 [, v, v, v, v, , , ] 1196 [, v, v, v, v, , , ] 1197 [] 1202 [] 1215 [, , , I] 1218 [, ] 1221 [] 1222 [, v, v] 1227 [, , ] 1228 [, , ] 1230 [] 1231 [, , , , ] 1239 [, , v, v, , ] 1243 [] 1244 [] 1248 [] 1253 [, v, v] 1255 [, , , , , ] 1256 [, , , ] 1262 [, , , v] 1263 [, XX] 1265 [, , II, vi] 1274 [, v, ] 1279 [, , , , iv] 1280 [, , , xxxi] 1293 [, v, , D] 1294 [] 1304 [] 1305 [v, , , , , v, , ] 1319 [, ] 1321 [] 1323 [, , , ] 1327 [v] 1337 [, v, ] 1338 [, , , , , , , , , , , , ] 1342 [, , , v, I, ] 1343 [, ] 1345 [, , , ] 1349 [v, , , , , , , , v] 1350 [v, , , , , , v, , , , , , , , , , ] 1351 [, ] 1354 [, ] 1355 [, , , , , ] 1361 [, , , , , , , , , , , , ] 1363 [, ] 1367 [, , , ] 1373 [, I, , II, , , ] 1379 [] 1385 [, I, , , , , , , , , , II, , , , , , II, , , , , , , II, , , , , , , , , , II, , , , , ] 1387 [, I, ] 1389 [] 1397 [, , , I, , , II, , , , , , , IV, , , , , , , V, ] 1398 [, III, , , V, , ] 1404 [, , , , , , , I, , III, , IV] 1407 [, ] 1409 [, , ] 1410 [, , I, , , II] 1419 [, , , , , , ] 1420 [, , , , , , , , , , , , , ] 1422 [, , ] 1423 [] 1424 [, , I, , , II, , , , I, , , I, , , II, , , I, , , , , II, , , , II] 1427 [, , , , , , , , , , , , , , ] 1432 [, II, , M, , ] 1433 [, , , , , ] 1434 [, , , ] 1440 [, , , , ] 1448 [] 1449 [, , , , , , , , , , ] 1451 [, ] 1454 [, v, , , , , , , , , v, v, , v, , , v, , , , , , ] 1458 [, , , , , , , , , , , , , , , , , , ] 1460 [, , , , ] 1464 [, , , , , , , , d, ] 1465 [, , , , , , ] 1467 [, , ] 1469 [, , , , , , , , , , , I, ] 1470 [, , , , , , , , , , , I, ] 1472 [, , , , , , , , , , , , ] 1474 [, , , , ] 1475 [, , , , , , , , , , XCIV, ] 1476 [] 1477 [, , , , , , , , ] 1479 [I, I, , I, , , , , ] 1483 [, , , , , , , , , , , , ] 1487 [] 1488 [, , , , , , , , , , , , , , , , , , , XXII, XXIII, , , , ] 1491 [] 1494 [, , , ] 1495 [, , , , , ] 1496 [, ] 1499 [, , , , , , ] 1500 [, , , , , , ] 1504 [, , , , , ] 1507 [, ] 1510 [, , vi, , , , , , , ] 1512 [, , , , , I] 1514 [, , , , ] 1515 [, IV, , , , , ] 1516 [, I, , , I, M, ] 1519 [, ] 1520 [, ] 1521 [, , , , ] 1525 [, , , ] 1526 [, , , , , , , , , , I, ] 1527 [, , , , , , , , , , , ] 1531 [, ] 1532 [, ] 1534 [, , ] 1535 [, , ] 1538 [, , ] 1541 [, , , , ] 1543 [, , , ] 1547 [, , I, , , , , , , ] 1549 [, , , , , , , , , , , , ] 1550 [, , ] 1552 [] 1554 [, , , , , , , , , , , , , ] 1555 [, , , , , , , ] 1556 [VII] 1560 [] 1561 [] 1562 [] 1563 [, , , , , , , , , , , ] 1565 [, , I] 1566 [] 1571 [] 1573 [] 1577 [] 1578 [, , , ] 1579 [, , ] 1582 [] 1590 [, , , , , , , M, , , , ] 1592 [] 1593 [] 1594 [, , , , , , , , , ] 1598 [, , ] 1606 [, ] 1607 [] 1608 [] 1613 [] 1622 [] 1624 [, , , , , , , , , , , , , ] 1627 [, , vii, xi, xiv] 1633 [] 1641 [] 1643 [, , ] 1644 [] 1653 [] 1656 [] 1657 [vii, ix, xi] 1658 [] Name: notes, dtype: object
END_footnotes_data_just_roman_numerals = END_footnotes_data_just_footnotes.notes.str.findall(r'(?i)\b(?=[ivxlcdm]+)M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3})\b')
END_footnotes_data_just_digits
230 [64] 240 [] 285 [61] 301 [] 312 [] 319 [] 339 [] 348 [] 370 [3] 390 [] 394 [] 395 [] 403 [] 419 NaN 524 [243] 579 [3, 6, 8, 11, 13, 39, 53, 85, 88, 89, 136, 146, 164] 580 [24, 65] 581 [24, 65] 582 [24, 65] 585 [5, 6] 587 [45, 46, 8, 57, 8, 76, 81, 9, 91, 93, 94, 102, 105, 106, 7, 107, 8, 109, 10, 121, 130, 137, 139,... 588 [45, 46, 8, 57, 8, 76, 81, 9, 91, 93, 94, 102, 105, 106, 7, 107, 8, 109, 10, 121, 130, 137, 139,... 591 [73, 79] 592 [9, 194] 600 [2, 30] 601 [102, 202] 608 [2, 3, 4, 5, 6, 8, 11, 14, 16, 18, 22, 28, 36, 41, 43, 44, 47, 48, 49, 50, 51, 52, 54, 56, 60, 6... 609 [1, 86, 90, 99, 182, 2, 26] 610 [17, 44, 50, 54, 59, 189, 229] 611 [43, 89, 101, 141, 19, 21, 136, 137, 141, 143, 217] 612 [41, 53, 61, 72, 194, 209, 240, 277, 288] 613 [41, 53, 61, 72, 194, 209, 240, 277, 288] 615 [7] 616 [265] 617 [265] 618 [9, 71, 72] 619 [75] 623 [50, 163, 4, 10, 1, 59, 70, 209, 210, 211, 214, 223, 241, 2, 243, 5, 249, 269] 626 [3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 17, 18, 22, 23, 24, 30, 31, 36, 37, 43, 46, 55, 56, 59... 628 [16, 17, 24, 5, 26, 28] 632 [1, 2] 638 [15, 207] 639 [1, 1, 2, 51, 52, 214, 2, 305, 3, 285] 644 [1, 16, 35, 53, 57, 58, 75, 77, 78, 95, 106, 112, 122, 126, 129, 130, 137, 155, 168, 183, 198, 1... 645 [1, 20, 42, 64, 69, 70, 90, 92, 93, 113, 126, 133, 149, 152, 153, 154, 162, 182, 197, 214, 232, ... 646 [1, 20, 42, 64, 69, 70, 90, 92, 93, 113, 126, 133, 149, 152, 153, 154, 162, 182, 197, 214, 232, ... 649 [5] 651 [175, 1, 111, 166, 183, 4, 1, 10, 12, 14, 15, 19, 21, 23, 29, 33, 35, 44, 54, 57, 72, 89, 90, 92... 652 [175, 1, 111, 166, 183, 4, 1, 10, 12, 14, 15, 19, 21, 23, 29, 33, 35, 44, 54, 57, 72, 89, 90, 92... 662 [1, 2, 3, 11, 12, 13, 15, 22, 23, 35, 37, 40, 43, 46, 47, 49, 54, 56, 59, 61, 63, 66, 67, 68, 70... 665 [2, 136, 316, 3, 92, 129] 667 [89] 668 [1, 22, 23, 2, 22, 76, 126, 128, 139, 185, 253, 3, 83, 151, 186, 210, 235, 287, 4, 26, 27, 44] 669 [206] 671 [] 672 [] 673 [179, 4] 677 [100, 119, 126, 178, 185, 191, 225] 708 [1, 2, 3, 11, 12, 13, 15, 22, 23, 35, 37, 40, 43, 46, 47, 49, 54, 56, 59, 61, 63, 66, 67, 68, 70... 711 [2, 136, 316, 3, 92, 129] 713 [89] 714 [1, 22, 23, 2, 22, 76, 126, 128, 139, 185, 253, 3, 83, 151, 186, 210, 235, 287, 4, 26, 27, 44] 715 [206] 717 [] 718 [] 719 [179, 4] 723 [100, 119, 126, 178, 185, 191, 225] 756 NaN 762 [17, 20, 26, 30, 32, 4, 37, 8, 43, 47, 8, 51, 2, 54, 8, 65, 7, 75, 77, 79, 82, 91, 3, 98, 9, 105... 763 [72, 81, 104, 114, 147, 150, 160, 173, 180, 197, 198, 233, 249, 254, 263, 265, 273, 274] 767 NaN 769 [4, 7, 8, 66] 777 [2, 20] 778 [2, 20] 780 [] 783 [] 784 [1, 59, 108, 112, 113, 145, 2, 204, 252, 3, 56, 7, 204] 789 [] 796 [] 809 NaN 810 [] 817 NaN 818 [1, 49, 52, 71, 102, 114, 125, 171, 189, 194, 237, 2, 130, 131, 137, 163, 234] 819 [143, 144, 145, 170, 209, 2, 143, 144, 1, 170, 209] 830 [69, 106, 1, 2] 831 [193, 196, 205, 207] 835 [232] 837 [3, 4, 2] 838 [] 842 NaN 843 [1, 114, 2, 266] 848 [] 854 [107, 173, 177, 178, 180, 185, 186] 855 [2] 862 [] 864 [4, 5, 7, 9, 12, 14, 15, 20, 21, 22, 23, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39,... 865 [1, 5, 7, 9, 11, 3, 15, 8, 20, 22, 7, 29, 30, 32, 42, 49, 53, 57, 62, 9, 71, 3, 76, 80, 83, 85, ... 867 [1, 2, 1, 2] 869 NaN 870 [1, 210] 874 [33, 70, 79, 85] 877 [] 879 [] 880 [49, 50, 51, 54, 55, 56, 57, 58, 62, 65, 67, 68, 70, 71, 76, 77, 81, 83, 87, 92, 97, 98, 100, 10... 881 [216, 286] 883 [] 888 [1, 5, 24, 25, 141, 142, 179, 181, 2, 102, 115, 117, 122, 123, 124, 126, 128, 140, 153, 155, 157... 890 [] 897 [1, 2, 3, 4, 5, 6, 11, 12, 15, 16, 19, 23, 24, 28, 30, 33, 44, 48, 49, 51, 55, 59, 64, 65, 76, 8... 900 [] 904 [33, 119, 179] 905 [] 907 [69, 72, 69] 910 [50, 1] 912 [6, 9, 19, 41, 127, 153] 913 [] 914 [76] 915 [] 919 NaN 921 [1, 29, 1] 924 [] 927 [] 931 NaN 932 [] 933 [] 935 [2, 94, 115, 225] 937 [] 944 NaN 947 [75] 951 [85, 177] 954 [66, 70, 148] 961 [148] 962 [355, 360, 368] 970 [14] 972 [1, 2, 3] 983 [47] 985 [50, 51, 113] 987 [246] 989 [90, 120] 992 [147] 993 [48, 50, 138, 214] 995 [68, 104, 129, 140] 1000 [] 1007 [204, 215] 1014 NaN 1017 [] 1021 [37, 38, 72] 1025 [88, 98, 104, 164] 1027 [247] 1031 [] 1032 [7, 369, 375, 378, 8, 385, 387] 1033 [153] 1035 [] 1038 [32] 1040 [9, 24] 1041 [17, 49, 56, 64] 1050 [32] 1058 [] 1067 [65, 228, 1] 1068 [1, 78, 9] 1070 [107, 109, 123, 137, 158, 163, 164, 165, 171] 1071 [40, 142] 1073 [] 1079 [] 1081 [1] 1085 [62, 81, 93, 96, 98, 145, 146, 172, 178, 179, 191, 193, 194, 211, 226, 236, 254, 311, 312, 383, ... 1086 [245] 1088 [233] 1090 [] 1092 [] 1095 [] 1101 [16, 20, 21, 26, 28, 51, 52, 66, 69, 78, 79, 86, 99, 102, 111, 114, 116, 7, 129, 132] 1122 [5, 13, 35] 1129 [170, 213] 1132 [120] 1133 [] 1140 [40] 1142 [402] 1148 [] 1150 [] 1154 [13] 1156 [13] 1162 [] 1165 [] 1170 [] 1180 [14, 29, 74, 170] 1184 [] 1187 NaN 1192 [361, 409] 1193 [] 1195 [44, 72, 145, 2, 290, 1, 3, 258, 4, 3] 1196 [44, 72, 145, 2, 290, 1, 3, 258, 4, 3] 1197 [148, 185] 1202 [86] 1215 [] 1218 [] 1221 [135, 144, 147, 212, 218] 1222 [1, 11, 14, 20, 39, 56, 86, 93, 95, 100, 106, 126, 127, 130, 2, 11, 57, 72, 73, 84, 85, 120] 1227 [] 1228 [] 1230 [42, 3, 56, 175, 180, 2, 184] 1231 [43, 45] 1239 [1, 3, 4] 1243 [350] 1244 [] 1248 [] 1253 [282, 423, 425, 1, 152, 161, 162, 163, 171, 235, 237, 238, 243, 522, 2] 1255 [] 1256 [] 1262 [3, 289] 1263 [113] 1265 [100, 10] 1274 [38, 1] 1279 [24] 1280 [108, 158] 1293 [124, 2] 1294 [164] 1304 [186, 255] 1305 [49, 3, 213, 4] 1319 [58] 1321 [220] 1323 [54] 1327 [37, 2] 1337 [58, 1] 1338 [291] 1342 [4, 1] 1343 [] 1345 [75] 1349 [22, 1, 2] 1350 [16, 1, 3, 2] 1351 [67] 1354 [121] 1355 [142] 1361 [4, 132, 161, 9, 10, 122, 129, 86, 142, 152, 804, 177] 1363 [59] 1367 [] 1373 [82, 253] 1379 [70, 174] 1385 [286, 13, 145, 174, 182] 1387 [57] 1389 [19] 1397 [22, 49, 133, 166, 167, 82] 1398 [354, 312, 482] 1404 [12, 19, 28, 30, 48, 256, 257, 259, 258, 290, 291, 46, 47, 285, 288] 1407 [216, 224, 235, 237, 244, 247, 248, 256, 258, 266, 269, 271, 272, 273, 274, 276, 277, 294, 295, ... 1409 [4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 20, 21, 22, 24, 25, 28, 29, 30, 31, 32, 33, 3... 1410 [18, 22, 71, 128, 3, 11, 18, 37, 129, 155, 156] 1419 [] 1420 [1, 5, 15, 18, 19, 20, 21, 26, 35, 44, 45, 48, 68, 71, 73, 74, 77, 78, 79, 86, 87, 92, 99, 100, ... 1422 [28, 31, 36, 49, 61, 65, 109, 113, 120, 188, 39, 54, 237, 257, 62, 93, 101, 62, 65, 197] 1423 [9, 10, 11, 19, 21, 23, 4, 28, 9, 32, 37, 40, 1, 43, 50, 2, 61, 64, 79, 94, 5, 97, 115, 123, 4, ... 1424 [4, 206, 207, 95, 121, 125, 207, 209, 66, 69, 127, 241, 211, 47, 48, 69, 225, 238, 239, 137] 1427 [8, 9, 46, 47] 1432 [75] 1433 [98, 124, 138, 187, 298, 218, 328, 438] 1434 [200] 1440 [119] 1448 [59] 1449 [87] 1451 [] 1454 [21, 2, 52, 2, 78, 2, 145, 2, 166, 2] 1458 [148, 148, 218, 224, 225, 227, 380, 435, 493] 1460 [57, 58] 1464 [] 1465 [] 1467 [149] 1469 [9, 67] 1470 [9, 67] 1472 [] 1474 [2] 1475 [20] 1476 [58] 1477 [113] 1479 [26] 1483 [98, 288] 1487 [234, 178, 9] 1488 [] 1491 [323] 1494 [113] 1495 [192] 1496 [130] 1499 [343, 343] 1500 [343, 343] 1504 [189] 1507 [193] 1510 [] 1512 [139, 140] 1514 [237] 1515 [24, 82] 1516 [163] 1519 [68] 1520 [213] 1521 [82] 1525 [189] 1526 [8, 179, 3, 196, 211, 212, 215, 218, 227, 233] 1527 [255] 1531 [235] 1532 [235] 1534 [189] 1535 [189] 1538 [373] 1541 [101] 1543 [92] 1547 [145, 146] 1549 [3, 30, 5] 1550 [173] 1552 [341, 342, 343, 344, 346, 348, 350, 351, 352, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, ... 1554 [9, 17] 1555 [114] 1556 [6, 9, 28, 42, 48, 49, 50, 57, 58, 61, 65, 68, 71, 73, 77, 82, 85, 86, 87, 88, 90, 92, 94, 95, 9... 1560 [2, 5, 9, 12, 17, 21, 28, 29, 36, 37, 38, 39, 49, 52, 55, 60, 61, 65, 108, 109, 112, 115, 121, 1... 1561 [2, 5, 9, 12, 17, 21, 28, 29, 36, 37, 38, 39, 49, 52, 55, 60, 61, 65, 108, 109, 112, 115, 121, 1... 1562 [65, 99, 112, 134, 182, 188, 189] 1563 [23] 1565 [93] 1566 [168, 201] 1571 [108, 112, 117, 164, 195] 1573 [87, 171] 1577 [79] 1578 [15] 1579 [] 1582 [173] 1590 [53] 1592 [35, 49, 56, 67, 70, 80, 85] 1593 [35, 49, 56, 67, 70, 80, 85] 1594 [43] 1598 [128] 1606 [5] 1607 [83] 1608 [7, 8, 39] 1613 [1, 9, 38, 127, 147, 163, 164, 165, 166, 176, 7, 178, 181, 184, 185, 186, 187, 189, 192, 193] 1622 [40] 1624 [153] 1627 [] 1633 [137, 153, 164, 167, 169, 176, 178, 188, 192, 222] 1641 [6, 59, 87, 145, 365] 1643 [3, 4, 5, 6, 8, 10, 11, 13, 14, 15] 1644 [55, 242] 1653 [137, 153, 164, 167, 169, 176, 178, 188, 192, 222] 1656 [45, 57, 58, 67, 105, 127, 195] 1657 [] 1658 [434] Name: notes, dtype: object
END_footnotes_data_just_digits.to_csv('END_footnotes_data_just_digits.csv')
END_footnotes_data_just_digits_cleaned = pd.read_csv('END_footnotes_data_just_digits-cleaned.csv', encoding='utf-8')
END_footnotes_data_just_digits_cleaned['notes'] = END_footnotes_data_just_digits_cleaned.notes.dropna()
END_footnotes_data_just_digits_cleaned['notes'] = END_footnotes_data_just_digits_cleaned.notes.dropna().astype(int)
END_footnotes_data_just_digits_cleaned = END_footnotes_data_just_digits_cleaned.dropna()
END_footnotes_data_just_digits_cleaned['notes'] = END_footnotes_data_just_digits_cleaned['notes'].astype(int)
END_footnotes_data_just_digits_cleaned
| Column | notes | |
|---|---|---|
| 0 | 230 | 64 |
| 2 | 285 | 61 |
| 8 | 370 | 3 |
| 14 | 524 | 243 |
| 15 | 579 | 3 |
| ... | ... | ... |
| 2678 | 1656 | 67 |
| 2679 | 1656 | 105 |
| 2680 | 1656 | 127 |
| 2681 | 1656 | 195 |
| 2683 | 1658 | 434 |
2599 rows × 2 columns
# Import in file cleaned in open refine to r# Import in file cleaned in open refine to remove uppercase roman numerals (volume numbers) and extra characters
END_footnotes_data_just_roman_numerals.to_csv('END_footnotes_data_roman_numerals.csv')
END_footnotes_data_just_roman_numerals_cleaned = pd.read_csv('END_footnotes_data_roman_numerals-cleaned.csv', encoding='utf-8')
END_footnotes_data_just_roman_numerals_cleaned = END_footnotes_data_just_roman_numerals_cleaned.dropna()
# Notice how 'v' is much higher–– this is likely due to END catalogers writing "v.1" or v2"
END_footnotes_data_just_roman_numerals_cleaned['notes'].value_counts().head(5)
v 83 vii 6 iv 5 xi 5 vi 4 Name: notes, dtype: int64
# Dropping the "v's" from our roman numerals data, since the majority of these refer to VOLUME not page numbers
END_footnotes_data_just_roman_numerals_cleaned = END_footnotes_data_just_roman_numerals_cleaned[END_footnotes_data_just_roman_numerals_cleaned['notes'] != 'v']
END_footnotes_data_just_digits_cleaned.notes.dtype
dtype('int64')
END_footnotes_data_just_roman_numerals_cleaned.head()
| Column | notes | |
|---|---|---|
| 0 | 585 | vii |
| 1 | 585 | xii |
| 2 | 587 | iv |
| 4 | 587 | xi |
| 5 | 587 | xii |
END_footnotes_data_all_page_numbers_cleaned = END_footnotes_data_just_digits_cleaned.append(END_footnotes_data_just_roman_numerals_cleaned)
/var/folders/hg/n067xqnn1nbbk0txk1mdhcq80000gn/T/ipykernel_81952/417160157.py:1: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. END_footnotes_data_all_page_numbers_cleaned = END_footnotes_data_just_digits_cleaned.append(END_footnotes_data_just_roman_numerals_cleaned)
END_footnotes_data_all_page_numbers_cleaned
| Column | notes | |
|---|---|---|
| 0 | 230 | 64 |
| 2 | 285 | 61 |
| 8 | 370 | 3 |
| 14 | 524 | 243 |
| 15 | 579 | 3 |
| ... | ... | ... |
| 154 | 1627 | xi |
| 155 | 1627 | xiv |
| 156 | 1657 | vii |
| 157 | 1657 | ix |
| 158 | 1657 | xi |
2675 rows × 2 columns
print("Page Number, count")
END_footnotes_data_all_page_numbers_cleaned.notes.value_counts()
Page Number, count
2 54 1 50 3 39 8 33 5 31 4 29 9 28 7 22 49 17 178 16 65 15 22 15 153 15 72 15 126 15 137 15 148 14 70 14 145 13 57 13 15 13 61 13 189 13 37 13 11 13 6 13 35 12 214 12 56 12 44 12 54 12 113 12 50 12 142 12 129 12 182 12 28 12 185 12 24 12 164 12 20 12 59 12 43 11 79 11 30 11 216 11 67 11 92 11 130 11 86 11 218 11 85 11 232 11 21 11 173 11 128 11 90 11 10 11 23 11 127 10 209 10 225 10 235 10 12 10 69 10 217 10 211 10 186 10 19 10 76 10 81 10 166 10 177 10 75 10 207 10 194 9 183 9 58 9 100 9 149 9 94 9 93 9 155 9 124 9 40 9 82 9 46 9 132 9 89 9 17 9 29 9 13 9 26 9 87 9 51 9 179 9 14 9 174 9 192 9 144 9 114 9 203 9 139 8 224 8 47 8 222 8 273 8 184 8 48 8 101 8 200 8 32 8 39 8 42 8 171 8 193 8 68 8 119 8 115 8 41 8 98 8 71 8 62 8 199 8 202 8 52 8 175 8 147 8 112 8 210 7 143 7 152 7 55 7 163 7 64 7 197 7 95 7 140 7 45 7 91 7 102 7 120 7 106 7 109 7 167 7 108 7 213 7 223 7 36 7 78 7 16 7 104 7 170 7 205 7 111 7 66 7 233 7 33 7 77 6 265 6 226 6 135 6 188 6 262 6 141 6 123 6 286 6 258 6 122 6 204 6 133 6 269 6 138 6 236 6 291 6 83 6 99 6 38 6 343 6 146 6 156 6 150 6 vii 6 121 6 245 6 247 6 107 6 254 6 191 6 136 6 18 6 180 6 88 6 53 6 288 6 206 6 277 5 169 5 198 5 355 5 240 5 xi 5 iv 5 256 5 234 5 249 5 248 5 257 5 260 5 280 5 201 5 317 5 131 5 97 5 181 5 255 5 243 5 105 5 116 5 134 5 237 5 176 5 319 5 229 5 158 5 117 5 73 5 160 5 157 5 60 4 312 4 195 4 304 4 238 4 187 4 159 4 253 4 154 4 162 4 96 4 103 4 282 4 294 4 vi 4 227 4 168 4 208 4 125 4 221 4 215 4 63 4 80 4 278 4 354 4 298 4 350 4 xii 4 220 4 31 4 337 4 359 4 84 4 328 4 259 4 74 4 ii 4 285 4 361 3 324 3 xxi 3 363 3 266 3 161 3 289 3 196 3 316 3 335 3 274 3 320 3 287 3 27 3 272 3 318 3 279 3 110 3 281 3 402 3 d 3 ix 3 xxxi 3 228 3 360 3 334 3 250 3 270 3 276 3 261 3 212 3 336 3 246 3 25 3 244 3 344 3 352 3 239 3 342 3 290 3 xxx 3 172 2 165 2 348 2 264 2 267 2 284 2 365 2 383 2 373 2 323 2 380 2 346 2 251 2 271 2 406 2 310 2 viii 2 xx 2 x 2 434 2 xvii 2 xiv 2 xxvii 2 xxvi 2 xxiv 2 xxii 2 378 2 332 2 331 2 409 2 295 2 296 2 299 2 306 2 308 2 309 2 493 2 330 2 305 2 375 2 376 2 435 2 432 2 431 2 418 2 416 2 415 2 404 2 401 2 397 2 391 2 390 2 389 2 385 2 371 2 496 2 369 2 362 2 358 2 356 2 353 2 351 2 345 2 327 2 326 2 315 2 311 2 292 2 242 2 448 2 398 2 252 2 506 2 368 2 263 2 498 2 151 2 118 2 230 2 219 2 241 2 433 1 510 1 511 1 524 1 388 1 423 1 475 1 426 1 504 1 417 1 414 1 411 1 410 1 408 1 407 1 405 1 403 1 400 1 399 1 396 1 424 1 480 1 494 1 xxxix 1 c 1 231 1 lxviii 1 lxiii 1 lvii 1 lvi 1 li 1 xlix 1 xlv 1 xlii 1 xxxvii 1 490 1 xxxvi 1 268 1 xv 1 xiii 1 34 1 453 1 464 1 338 1 340 1 394 1 395 1 386 1 393 1 436 1 321 1 313 1 307 1 302 1 300 1 297 1 293 1 482 1 283 1 503 1 502 1 440 1 421 1 441 1 190 1 442 1 444 1 314 1 460 1 463 1 465 1 467 1 303 1 301 1 322 1 325 1 474 1 364 1 384 1 382 1 381 1 379 1 377 1 374 1 372 1 370 1 367 1 366 1 392 1 357 1 804 1 341 1 425 1 387 1 438 1 347 1 333 1 522 1 349 1 509 1 505 1 329 1 i 1 Name: notes, dtype: int64
ax = END_footnotes_data_just_digits_cleaned.notes.value_counts().head(25).plot(kind='bar', title ='Top 25 locations for footnotes in END volumes', figsize=(10,8))
ax.set_xlabel("Page Number")
ax.set_ylabel("Count of footnotes present")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
print("Top 25 locations for footnotes in END volumes")
print("")
print("Page Number, count")
END_footnotes_data_all_page_numbers_cleaned.notes.value_counts().head(25)
Top 25 locations for footnotes in END volumes Page Number, count
2 54 1 50 3 39 8 33 5 31 4 29 9 28 7 22 49 17 178 16 65 15 22 15 153 15 72 15 126 15 137 15 148 14 70 14 145 13 57 13 15 13 61 13 189 13 37 13 11 13 Name: notes, dtype: int64
# It looks like the first 10 pages have higher amounts of recorded footnotes-–let's confirm this
# Let's create a new data column binning our footnote data into 10-page chunks and catch-all categories for volumes with 300+pages
page_number_categories = []
for page_number in END_footnotes_data_all_page_numbers_cleaned['notes']:
if type(page_number) == int:
if 0 < page_number < 10:
page_number_categories.append("001s")
elif 20 > page_number >= 10:
page_number_categories.append("010s")
elif 30 > page_number >= 20:
page_number_categories.append("020s")
elif 40 > page_number >= 30:
page_number_categories.append("030s")
elif 50 > page_number >= 40:
page_number_categories.append("040s")
elif 60 > page_number >= 50:
page_number_categories.append("050s")
elif 70 > page_number >= 60:
page_number_categories.append("060s")
elif 80 > page_number >= 70:
page_number_categories.append("070s")
elif 90 > page_number >= 80:
page_number_categories.append("080s")
elif 100 > page_number >= 90:
page_number_categories.append("090s")
elif 110 > page_number >= 100:
page_number_categories.append("100s")
elif 120 > page_number >= 110:
page_number_categories.append("110s")
elif 130 > page_number >= 120:
page_number_categories.append("120s")
elif 140 > page_number >= 130:
page_number_categories.append("130s")
elif 150 > page_number >= 140:
page_number_categories.append("140s")
elif 160 > page_number >= 150:
page_number_categories.append("150s")
elif 170 > page_number >= 160:
page_number_categories.append("160s")
elif 180 > page_number >= 170:
page_number_categories.append("170s")
elif 190 > page_number >= 180:
page_number_categories.append("180s")
elif 200 > page_number >= 190:
page_number_categories.append("190s")
elif 210 > page_number >= 200:
page_number_categories.append("200s")
elif 220 > page_number >= 210:
page_number_categories.append("210s")
elif 230 > page_number >= 220:
page_number_categories.append("220s")
elif 240 > page_number >= 230:
page_number_categories.append("230s")
elif 250 > page_number >= 240:
page_number_categories.append("240s")
elif 260 > page_number >= 250:
page_number_categories.append("250s")
elif 270 > page_number >= 260:
page_number_categories.append("260s")
elif 280 > page_number >= 270:
page_number_categories.append("270s")
elif 290 > page_number >= 280:
page_number_categories.append("280s")
elif 300 > page_number >= 290:
page_number_categories.append("290s")
elif 310 > page_number >= 300:
page_number_categories.append("300s")
elif 320 > page_number >= 310:
page_number_categories.append("310s")
elif 330 > page_number >= 320:
page_number_categories.append("320s")
elif 340 > page_number >= 330:
page_number_categories.append("330s")
elif 350 > page_number >= 340:
page_number_categories.append("340s")
elif 360 > page_number >= 350:
page_number_categories.append("350s")
elif 370 > page_number >= 360:
page_number_categories.append("360s")
elif 380 > page_number >= 370:
page_number_categories.append("370s")
elif 390 > page_number >= 380:
page_number_categories.append("380s")
elif 400 > page_number >= 390:
page_number_categories.append("390s")
elif 500 > page_number >= 400:
page_number_categories.append("400s")
elif 600 > page_number >= 500:
page_number_categories.append("500s")
elif page_number >= 600:
page_number_categories.append("600s+")
else:
page_number_categories.append("000-Frontmatter")
END_footnotes_data_all_page_numbers_cleaned['page_number_categories'] = page_number_categories
ax = END_footnotes_data_all_page_numbers_cleaned.page_number_categories.value_counts()\
.plot(kind='bar', rot=45, figsize=(18,5), title="Locations for footnotes in END Volumes, binning by every 10 pages")
ax.set_xlabel("Page numbers footnote appears on, by 10s")
ax.set_ylabel("Count of footnotes present")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))
NOTES ON THE ABOVE CHART: The above numbers need to be qualified somewhat: 76 of the page numbers that appear to be in the "1s" category are the product of volume numbers in the cataloger notes -- eg "v.3" (see full text of cataloger notes here).
This would mean there are 223 (not 299) footnotes that appear in pages 1-10.
Even so, if we include the number of footnotes that appear on pages paginated with Roman numerals (appearing in the prefatory matter), that would be 299 footnotes that appear in the first ten pages and in the prefatory material of the novel, three times the amount that appear in any other 10 page chunk of the text.
ax = END_footnotes_data_all_page_numbers_cleaned.page_number_categories.value_counts().sort_index()\
.plot(kind='bar', rot=45, figsize=(20,5), title="Locations for footnotes in END Volumes, binning by every 10 pages, sorted by place in novel")
ax.set_xlabel("Page numbers footnote appears on, by 10s")
ax.set_ylabel("Count of footnotes present")
for p in ax.patches:
ax.annotate(str(p.get_height()), (p.get_x() * 1.005, p.get_height() * 1.005))